diff options
-rw-r--r-- | sbin/ipfw/Makefile | 2 | ||||
-rw-r--r-- | sbin/ipfw/altq.c | 6 | ||||
-rw-r--r-- | sbin/ipfw/dummynet.c | 41 | ||||
-rw-r--r-- | sbin/ipfw/ipfw.8 | 343 | ||||
-rw-r--r-- | sbin/ipfw/ipfw2.c | 2173 | ||||
-rw-r--r-- | sbin/ipfw/ipfw2.h | 67 | ||||
-rw-r--r-- | sbin/ipfw/ipv6.c | 54 | ||||
-rw-r--r-- | sbin/ipfw/main.c | 4 | ||||
-rw-r--r-- | sbin/ipfw/tables.c | 1781 | ||||
-rw-r--r-- | sys/conf/files | 2 | ||||
-rw-r--r-- | sys/modules/ipfw/Makefile | 15 | ||||
-rw-r--r-- | sys/netinet/ip_fw.h | 352 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw2.c | 144 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_dynamic.c | 165 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_iface.c | 526 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_private.h | 277 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_sockopt.c | 2540 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_table.c | 3523 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_table.h | 198 | ||||
-rw-r--r-- | sys/netpfil/ipfw/ip_fw_table_algo.c | 3549 |
20 files changed, 13826 insertions, 1936 deletions
diff --git a/sbin/ipfw/Makefile b/sbin/ipfw/Makefile index de27e3e..9eb4511 100644 --- a/sbin/ipfw/Makefile +++ b/sbin/ipfw/Makefile @@ -3,7 +3,7 @@ .include <src.opts.mk> PROG= ipfw -SRCS= ipfw2.c dummynet.c ipv6.c main.c nat.c +SRCS= ipfw2.c dummynet.c ipv6.c main.c nat.c tables.c WARNS?= 2 .if ${MK_PF} != "no" diff --git a/sbin/ipfw/altq.c b/sbin/ipfw/altq.c index 8dced11..8398ab6 100644 --- a/sbin/ipfw/altq.c +++ b/sbin/ipfw/altq.c @@ -137,15 +137,15 @@ altq_qid_to_name(u_int32_t qid) } void -print_altq_cmd(ipfw_insn_altq *altqptr) +print_altq_cmd(struct buf_pr *bp, ipfw_insn_altq *altqptr) { if (altqptr) { const char *qname; qname = altq_qid_to_name(altqptr->qid); if (qname == NULL) - printf(" altq ?<%u>", altqptr->qid); + bprintf(bp, " altq ?<%u>", altqptr->qid); else - printf(" altq %s", qname); + bprintf(bp, " altq %s", qname); } } diff --git a/sbin/ipfw/dummynet.c b/sbin/ipfw/dummynet.c index cb62853..dc95a19 100644 --- a/sbin/ipfw/dummynet.c +++ b/sbin/ipfw/dummynet.c @@ -174,48 +174,44 @@ print_header(struct ipfw_flow_id *id) } static void -list_flow(struct dn_flow *ni, int *print) +list_flow(struct buf_pr *bp, struct dn_flow *ni) { char buff[255]; struct protoent *pe = NULL; struct in_addr ina; struct ipfw_flow_id *id = &ni->fid; - if (*print) { - print_header(&ni->fid); - *print = 0; - } pe = getprotobynumber(id->proto); /* XXX: Should check for IPv4 flows */ - printf("%3u%c", (ni->oid.id) & 0xff, + bprintf(bp, "%3u%c", (ni->oid.id) & 0xff, id->extra ? '*' : ' '); if (!IS_IP6_FLOW_ID(id)) { if (pe) - printf("%-4s ", pe->p_name); + bprintf(bp, "%-4s ", pe->p_name); else - printf("%4u ", id->proto); + bprintf(bp, "%4u ", id->proto); ina.s_addr = htonl(id->src_ip); - printf("%15s/%-5d ", + bprintf(bp, "%15s/%-5d ", inet_ntoa(ina), id->src_port); ina.s_addr = htonl(id->dst_ip); - printf("%15s/%-5d ", + bprintf(bp, "%15s/%-5d ", inet_ntoa(ina), id->dst_port); } else { /* Print IPv6 flows */ if (pe != NULL) - printf("%9s ", pe->p_name); + bprintf(bp, "%9s ", pe->p_name); else - printf("%9u ", id->proto); - printf("%7d %39s/%-5d ", id->flow_id6, + bprintf(bp, "%9u ", id->proto); + bprintf(bp, "%7d %39s/%-5d ", id->flow_id6, inet_ntop(AF_INET6, &(id->src_ip6), buff, sizeof(buff)), id->src_port); - printf(" %39s/%-5d ", + bprintf(bp, " %39s/%-5d ", inet_ntop(AF_INET6, &(id->dst_ip6), buff, sizeof(buff)), id->dst_port); } - pr_u64(&ni->tot_pkts, 4); - pr_u64(&ni->tot_bytes, 8); - printf("%2u %4u %3u\n", + pr_u64(bp, &ni->tot_pkts, 4); + pr_u64(bp, &ni->tot_bytes, 8); + bprintf(bp, "%2u %4u %3u", ni->length, ni->len_bytes, ni->drops); } @@ -303,8 +299,10 @@ list_pipes(struct dn_id *oid, struct dn_id *end) { char buf[160]; /* pending buffer */ int toPrint = 1; /* print header */ + struct buf_pr bp; buf[0] = '\0'; + bp_alloc(&bp, 4096); for (; oid != end; oid = O_NEXT(oid, oid->len)) { if (oid->len < sizeof(*oid)) errx(1, "invalid oid len %d\n", oid->len); @@ -346,7 +344,12 @@ list_pipes(struct dn_id *oid, struct dn_id *end) break; case DN_FLOW: - list_flow((struct dn_flow *)oid, &toPrint); + if (toPrint != 0) { + print_header(&((struct dn_flow *)oid)->fid); + toPrint = 0; + } + list_flow(&bp, (struct dn_flow *)oid); + printf("%s\n", bp.buf); break; case DN_LINK: { @@ -384,6 +387,8 @@ list_pipes(struct dn_id *oid, struct dn_id *end) } flush_buf(buf); // XXX does it really go here ? } + + bp_free(&bp); } /* diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8 index 5b56b39..9647697 100644 --- a/sbin/ipfw/ipfw.8 +++ b/sbin/ipfw/ipfw.8 @@ -1,7 +1,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 31, 2014 +.Dd Aug 13, 2014 .Dt IPFW 8 .Os .Sh NAME @@ -48,17 +48,43 @@ in-kernel NAT. .Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive .Ss LOOKUP TABLES .Nm -.Cm table Ar number Cm add Ar addr Ns Oo / Ns Ar masklen Oc Op Ar value +.Oo Cm set Ar N Oc Cm table Ar name Cm create Ar create-options .Nm -.Cm table Ar number Cm delete Ar addr Ns Op / Ns Ar masklen +.Oo Cm set Ar N Oc Cm table Ar name Cm destroy .Nm -.Cm table -.Brq Ar number | all -.Cm flush +.Oo Cm set Ar N Oc Cm table Ar name Cm modify Ar modify-options +.Nm +.Oo Cm set Ar N Oc Cm table Ar name Cm swap Ar name +.Nm +.Oo Cm set Ar N Oc Cm table Ar name Cm add Ar table-key Op Ar value +.Nm +.Oo Cm set Ar N Oc Cm table Ar name Cm add Op Ar table-key Ar value ... +.Nm +.Oo Cm set Ar N Oc Cm table Ar name Cm atomic add Op Ar table-key Ar value ... .Nm -.Cm table -.Brq Ar number | all +.Oo Cm set Ar N Oc Cm table Ar name Cm delete Op Ar table-key ... +.Nm +.Oo Cm set Ar N Oc Cm table Ar name Cm lookup Ar addr +.Nm +.Oo Cm set Ar N Oc Cm table Ar name Cm lock +.Nm +.Oo Cm set Ar N Oc Cm table Ar name Cm unlock +.Nm +.Oo Cm set Ar N Oc Cm table +.Brq Ar name | all .Cm list +.Nm +.Oo Cm set Ar N Oc Cm table +.Brq Ar name | all +.Cm info +.Nm +.Oo Cm set Ar N Oc Cm table +.Brq Ar name | all +.Cm detail +.Nm +.Oo Cm set Ar N Oc Cm table +.Brq Ar name | all +.Cm flush .Ss DUMMYNET CONFIGURATION (TRAFFIC SHAPER AND PACKET SCHEDULER) .Nm .Brq Cm pipe | queue | sched @@ -822,10 +848,11 @@ It is possible to use the .Cm tablearg keyword with a skipto for a .Em computed -skipto, but care should be used, as no destination caching -is possible in this case so the rules are always walked to find it, -starting from the -.Cm skipto . +skipto. Skipto may work either in O(log(N)) or in O(1) depending +on amount of memory and/or sysctl variables. +See the +.Sx SYSCTL VARIABLES +section for more details. .It Cm call Ar number | tablearg The current rule number is saved in the internal stack and ruleset processing continues with the first rule numbered @@ -1152,7 +1179,7 @@ with multiple addresses) is provided for convenience only and its use is discouraged. .It Ar addr : Oo Cm not Oc Bro .Cm any | me | me6 | -.Cm table Ns Pq Ar number Ns Op , Ns Ar value +.Cm table Ns Pq Ar name Ns Op , Ns Ar value .Ar | addr-list | addr-set .Brc .Bl -tag -width indent @@ -1164,8 +1191,8 @@ matches any IP address configured on an interface in the system. matches any IPv6 address configured on an interface in the system. The address list is evaluated at the time the packet is analysed. -.It Cm table Ns Pq Ar number Ns Op , Ns Ar value -Matches any IPv4 address for which an entry exists in the lookup table +.It Cm table Ns Pq Ar name Ns Op , Ns Ar value +Matches any IPv4 or IPv6 address for which an entry exists in the lookup table .Ar number . If an optional 32-bit unsigned .Ar value @@ -1359,6 +1386,19 @@ and IPsec encapsulated security payload headers .It Cm fib Ar fibnum Matches a packet that has been tagged to use the given FIB (routing table) number. +.It Cm flow Ar table Ns Pq Ar name Ns Op , Ns Ar value +Search for the flow entry in lookup table +.Ar name . +If not found, the match fails. +Otherwise, the match succeeds and +.Cm tablearg +is set to the value extracted from the table. +.Pp +This option can be useful to quickly dispatch traffic based on +certain packet fields. +See the +.Sx LOOKUP TABLES +section below for more information on lookup tables. .It Cm flow-id Ar labels Matches IPv6 packets containing any of the flow labels given in .Ar labels . @@ -1550,9 +1590,9 @@ of source and destination addresses and ports can be specified. Currently, only IPv4 flows are supported. -.It Cm lookup Bro Cm dst-ip | dst-port | src-ip | src-port | uid | jail Brc Ar N +.It Cm lookup Bro Cm dst-ip | dst-port | src-ip | src-port | uid | jail Brc Ar name Search an entry in lookup table -.Ar N +.Ar name that matches the field specified as argument. If not found, the match fails. Otherwise, the match succeeds and @@ -1616,13 +1656,19 @@ and they are always printed as hexadecimal (unless the option is used, in which case symbolic resolution will be attempted). .It Cm proto Ar protocol Matches packets with the corresponding IP protocol. -.It Cm recv | xmit | via Brq Ar ifX | Ar if Ns Cm * | Ar table Ns Pq Ar number Ns Op , Ns Ar value | Ar ipno | Ar any +.It Cm recv | xmit | via Brq Ar ifX | Ar if Ns Cm * | Ar table Ns Po Ar name Ns Oo , Ns Ar value Oc Pc | Ar ipno | Ar any Matches packets received, transmitted or going through, respectively, the interface specified by exact name .Po Ar ifX Pc , by device name .Po Ar if* Pc , by IP address, or through some interface. +Table +.Ar name +may be used to match interface by its kernel ifindex. +See the +.Sx LOOKUP TABLES +section below for more information on lookup tables. .Pp The .Cm via @@ -1817,15 +1863,35 @@ connected networks instead of all source addresses. .Sh LOOKUP TABLES Lookup tables are useful to handle large sparse sets of addresses or other search keys (e.g., ports, jail IDs, interface names). -In the rest of this section we will use the term ``address''. -There may be up to 65535 different lookup tables, numbered 0 to 65534. +In the rest of this section we will use the term ``key''. +Table name needs to match the following spec: +.Ar table-name . +Tables with the same name can be created in different +.Ar sets . +However, rule links to the tables in +.Ar set 0 +by default. +This behavior can be controlled by +.Va net.inet.ip.fw.tables_sets +variable. +See the +.Sx SETS OF RULES +section for more information. +There may be up to 65535 different lookup tables. .Pp +The following table types are supported: +.Bl -tag -width indent +.It Ar table-type : Ar cidr | iface | number | flow +.It Ar table-key : Ar addr Ns Oo / Ns Ar masklen Oc | iface-name | number | flow-spec +.It Ar flow-spec : Ar flow-field Ns Op , Ns Ar flow-spec +.It Ar flow-field : src-ip | proto | src-port | dst-ip | dst-port +.It Cm cidr +matches IPv4 or IPv6 address. Each entry is represented by an .Ar addr Ns Op / Ns Ar masklen and will match all addresses with base .Ar addr -(specified as an IPv4/IPv6 address, a hostname or an unsigned integer) -and mask width of +(specified as an IPv4/IPv6 address, or a hostname) and mask width of .Ar masklen bits. If @@ -1833,29 +1899,159 @@ If is not specified, it defaults to 32 for IPv4 and 128 for IPv6. When looking up an IP address in a table, the most specific entry will match. -Associated with each entry is a 32-bit unsigned -.Ar value , -which can optionally be checked by a rule matching code. -When adding an entry, if -.Ar value -is not specified, it defaults to 0. -.Pp -An entry can be added to a table -.Pq Cm add , -or removed from a table -.Pq Cm delete . -A table can be examined -.Pq Cm list -or flushed -.Pq Cm flush . -.Pp -Internally, each table is stored in a Radix tree, the same way as -the routing table (see -.Xr route 4 ) . +.It Cm iface +matches interface names. +Each entry is represented by string treated as interface name. +Wildcards are not supported. +.It Cm number +maches protocol ports, uids/gids or jail IDs. +Each entry is represented by 32-bit unsigned integer. +Ranges are not supported. +.It Cm flow +Matches packet fields specified by +.Ar flow +type suboptions with table entries. +.El +.Pp +The following value format types are supported: +.Bl -tag -width indent +.It Ar value-ftype : Ar number | ip +.It Cm number +Default for +.Ar number +value type. +Shows values as unsigned integer. +.It Cm ip +Show values as IPv4 addresses. +.El +.Pp +Tables require explicit creation via +.Cm create +before use. +.Pp +The following creation options are supported: +.Bl -tag -width indent +.It Ar create-options : Ar create-option | create-options +.It Ar create-option : Cm type Ar table-type | Cm ftype Ar value-ftype | Cm algo Ar algo-desc | +.Cm limit Ar number | Cm locked +.It Cm type +Table key type. +.It Cm ftype +Table value format type. +Affects userland formatting only. +.It Cm algo +Table algorithm to use (see below). +.It Cm limit +Maximum number of items that may be inserted into table. +.It Cm locked +Restrict any table modifications. +.El +.Pp +Some of these options may be modified later via +.Cm modify +keyword. +The following options can be changed: +.Bl -tag -width indent +.It Ar modify-options : Ar modify-option | modify-options +.It Ar modify-option : Cm ftype Ar value-ftype | Cm limit Ar number +.It Cm ftype +Set table value format type. +Affects userland formatting only. +.It Cm limit +Alter maximum number of items that may be inserted into table. +.El +.Pp +Additionally, table can be locked or unlocked using +.Cm lock +or +.Cm unlock +commands. +.Pp +Tables of the same +.Ar type +and +.Ar valtype +can be swapped with each other using +.Cm swap Ar name +command. +Swap may fail if tables limits are set and data exchange +would result in limits hit. +Operation is performed atomically. +.Pp +One or more entries can be added to a table at once using +.Cm add +command. +Addition of all items are performed atomically. +By default, error in addition of one entry does not influence +addition of other entries. However, non-zero error code is returned +in that case. +Special +.Cm atomic +keyword may be specified before +.Cm add +to indicate all-or-none add request. +.Pp +One or more entries can be removed from a table at once using +.Cm delete +command. +By default, error in removal of one entry does not influence +removing of other entries. However, non-zero error code is returned +in that case. +.Pp +It may be possible to check what entry will be found on particular +.Ar table-key +using +.Cm lookup +.Ae table-key +command. +This functionality is optional and may be unsupported in some algorithms. +.Pp +The following operations can be performed on +.Ar one +or +.Cm all +tables: +.Bl -tag -width indent +.It Cm list +List all entries. +.It Cm flush +Removes all entries. +.It Cm info +Shows generic table information. +.It Cm detail +Shows generic table information and algo-specific data. +.El .Pp -Lookup tables currently support only ports, jail IDs, IPv4/IPv6 addresses -and interface names. -Wildcards is not supported for interface names. +The following lookup algorithms are supported: +.Bl -tag -width indent +.It Ar algo-desc : algo-name | "algo-name algo-data" +.It Ar algo-name: Ar cidr:radix | cidr:hash | iface:arrray | number:array | flow:hash +.It Cm cidr:radix +Separate Radix trees for IPv4 and IPv6, the same way as the routing table (see +.Xr route 4 ) . +Default choice for +.Ar +cidr +type. +.It Cm cidr:hash +Separate auto-growing hashes for IPv4 and IPv6. +Accepts entries with the same mask length specified initially via +.Cm "cidr:hash masks=/v4,/v6" +algorithm creation options. +Assume /32 and /128 masks by default. +Search removes host bits (according to mask) from supplied address and checks +resulting key in appropriate hash. +Mostly optimized for /64 and byte-ranged IPv6 masks. +.It Cm iface:arrray +Array storing sorted indexes for entries which are presented in the system. +Optimized for very fast lookup. +.It Cm number:array +Array storing sorted u32 numbers. +.It Cm flow:hash +Auto-growing hash storing flow entries. +Search calculates hash on required packet fields and searches for matching +entries in selected bucket. +.El .Pp The .Cm tablearg @@ -1864,6 +2060,15 @@ the argument for a rule action, action parameter or rule option. This can significantly reduce number of rules in some configurations. If two tables are used in a rule, the result of the second (destination) is used. +.Pp +The following value types are supported: +.Bl -tag -width indent +.It Ar value-type : Ar number +.It Cm number +Default value type. +If value is not specified, defaults to 0. +.El +.Pp The .Cm tablearg argument can be used with the following actions: @@ -1888,17 +2093,25 @@ up to a rule equal to, or past, the given number, and should therefore try keep the ruleset compact between the skipto and the target rules. .Sh SETS OF RULES -Each rule belongs to one of 32 different +Each rule or table belongs to one of 32 different .Em sets , numbered 0 to 31. Set 31 is reserved for the default rule. .Pp -By default, rules are put in set 0, unless you use the +By default, rules or tables are put in set 0, unless you use the .Cm set N -attribute when entering a new rule. +attribute when adding a new rule or table. Sets can be individually and atomically enabled or disabled, so this mechanism permits an easy way to store multiple configurations of the firewall and quickly (and atomically) switch between them. +.Pp +By default, tables from set 0 are referenced when adding rule with +table opcodes regardless of rule set. +This behavior can be changed by setting +.Va net.inet.ip.fw.tables_set +variable to 1. +Rule's set will then be used for table references. +.Pp The command to enable/disable sets is .Bd -ragged -offset indent .Nm @@ -3220,30 +3433,42 @@ Then we classify traffic using a single rule: .Dl "ipfw pipe 1 config bw 1000Kbyte/s" .Dl "ipfw pipe 4 config bw 4000Kbyte/s" .Dl "..." -.Dl "ipfw table 1 add 192.168.2.0/24 1" -.Dl "ipfw table 1 add 192.168.0.0/27 4" -.Dl "ipfw table 1 add 192.168.0.2 1" +.Dl "ipfw table T1 create type cidr" +.Dl "ipfw table T1 add 192.168.2.0/24 1" +.Dl "ipfw table T1 add 192.168.0.0/27 4" +.Dl "ipfw table T1 add 192.168.0.2 1" .Dl "..." -.Dl "ipfw add pipe tablearg ip from table(1) to any" +.Dl "ipfw add pipe tablearg ip from 'table(T1)' to any" .Pp Using the .Cm fwd action, the table entries may include hostnames and IP addresses. .Pp -.Dl "ipfw table 1 add 192.168.2.0/24 10.23.2.1" -.Dl "ipfw table 1 add 192.168.0.0/27 router1.dmz" +.Dl "ipfw table T2 create type cidr ftype ip" +.Dl "ipfw table T2 add 192.168.2.0/24 10.23.2.1" +.Dl "ipfw table T21 add 192.168.0.0/27 router1.dmz" .Dl "..." .Dl "ipfw add 100 fwd tablearg ip from any to table(1)" .Pp In the following example per-interface firewall is created: .Pp -.Dl "ipfw table 10 add vlan20 12000" -.Dl "ipfw table 10 add vlan30 13000" -.Dl "ipfw table 20 add vlan20 22000" -.Dl "ipfw table 20 add vlan30 23000" +.Dl "ipfw table IN create type iface" +.Dl "ipfw table IN add vlan20 12000" +.Dl "ipfw table IN add vlan30 13000" +.Dl "ipfw table OUT create type iface" +.Dl "ipfw table OUT add vlan20 22000" +.Dl "ipfw table OUT add vlan30 23000" +.Dl ".." +.Dl "ipfw add 100 ipfw skipto tablearg ip from any to any recv 'table(IN)' in" +.Dl "ipfw add 200 ipfw skipto tablearg ip from any to any xmit 'table(OUT)' out" +.Pp +The following example illustrate usage of flow tables: +.Pp +.Dl "ipfw table fl create type flow:flow:src-ip,proto,dst-ip,dst-port" +.Dl "ipfw table fl add 2a02:6b8:77::88,tcp,2a02:6b8:77::99,80 11" +.Dl "ipfw table fl add 10.0.0.1,udp,10.0.0.2,53 12" .Dl ".." -.Dl "ipfw add 100 ipfw skipto tablearg ip from any to any recv 'table(10)' in" -.Dl "ipfw add 200 ipfw skipto tablearg ip from any to any xmit 'table(10)' out" +.Dl "ipfw add 100 allow ip from any to any flow 'table(fl,11)' recv ix0" .Ss SETS OF RULES To add a set of rules atomically, e.g.\& set 18: .Pp diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index 25d6afd..deb7522 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -35,6 +35,7 @@ #include <netdb.h> #include <pwd.h> #include <stdio.h> +#include <stdarg.h> #include <stdlib.h> #include <string.h> #include <sysexits.h> @@ -56,16 +57,22 @@ struct cmdline_opts co; /* global options */ +struct format_opts { + int bcwidth; + int pcwidth; + int show_counters; + uint32_t set_mask; /* enabled sets mask */ + uint32_t flags; /* request flags */ + uint32_t first; /* first rule to request */ + uint32_t last; /* last rule to request */ + uint32_t dcnt; /* number of dynamic states */ + ipfw_obj_ctlv *tstate; /* table state data */ +}; + int resvd_set_number = RESVD_SET; int ipfw_socket = -1; -uint32_t ipfw_tables_max = 0; /* Number of tables supported by kernel */ - -#ifndef s6_addr32 -#define s6_addr32 __u6_addr.__u6_addr32 -#endif - #define CHECK_LENGTH(v, len) do { \ if ((v) < (len)) \ errx(EX_DATAERR, "Rule too long"); \ @@ -86,7 +93,7 @@ uint32_t ipfw_tables_max = 0; /* Number of tables supported by kernel */ if (!av[0]) \ errx(EX_USAGE, "%s: missing argument", match_value(s_x, tok)); \ if (_substrcmp(*av, "tablearg") == 0) { \ - arg = IP_FW_TABLEARG; \ + arg = IP_FW_TARG; \ break; \ } \ \ @@ -104,24 +111,13 @@ uint32_t ipfw_tables_max = 0; /* Number of tables supported by kernel */ errx(EX_DATAERR, "%s: argument is out of range (%u..%u): %s", \ match_value(s_x, tok), min, max, *av); \ \ - if (_xval == IP_FW_TABLEARG) \ + if (_xval == IP_FW_TARG) \ errx(EX_DATAERR, "%s: illegal argument value: %s", \ match_value(s_x, tok), *av); \ arg = _xval; \ } \ } while (0) -static void -PRINT_UINT_ARG(const char *str, uint32_t arg) -{ - if (str != NULL) - printf("%s",str); - if (arg == IP_FW_TABLEARG) - printf("tablearg"); - else - printf("%u", arg); -} - static struct _s_x f_tcpflags[] = { { "syn", TH_SYN }, { "fin", TH_FIN }, @@ -169,7 +165,7 @@ static struct _s_x f_iptos[] = { { NULL, 0 } }; -static struct _s_x f_ipdscp[] = { +struct _s_x f_ipdscp[] = { { "af11", IPTOS_DSCP_AF11 >> 2 }, /* 001010 */ { "af12", IPTOS_DSCP_AF12 >> 2 }, /* 001100 */ { "af13", IPTOS_DSCP_AF13 >> 2 }, /* 001110 */ @@ -357,6 +353,7 @@ static struct _s_x rule_options[] = { { "src-ipv6", TOK_SRCIP6}, { "src-ip6", TOK_SRCIP6}, { "lookup", TOK_LOOKUP}, + { "flow", TOK_FLOW}, { "//", TOK_COMMENT }, { "not", TOK_NOT }, /* pseudo option */ @@ -370,6 +367,103 @@ static struct _s_x rule_options[] = { { NULL, 0 } /* terminator */ }; +void bprint_uint_arg(struct buf_pr *bp, const char *str, uint32_t arg); +static int ipfw_get_config(struct cmdline_opts *co, struct format_opts *fo, + ipfw_cfg_lheader **pcfg, size_t *psize); +static int ipfw_show_config(struct cmdline_opts *co, struct format_opts *fo, + ipfw_cfg_lheader *cfg, size_t sz, int ac, char **av); +static void ipfw_list_tifaces(void); + +/* + * Simple string buffer API. + * Used to simplify buffer passing between function and for + * transparent overrun handling. + */ + +/* + * Allocates new buffer of given size @sz. + * + * Returns 0 on success. + */ +int +bp_alloc(struct buf_pr *b, size_t size) +{ + memset(b, 0, sizeof(struct buf_pr)); + + if ((b->buf = calloc(1, size)) == NULL) + return (ENOMEM); + + b->ptr = b->buf; + b->size = size; + b->avail = b->size; + + return (0); +} + +void +bp_free(struct buf_pr *b) +{ + + free(b->buf); +} + +/* + * Flushes buffer so new writer start from beginning. + */ +void +bp_flush(struct buf_pr *b) +{ + + b->ptr = b->buf; + b->avail = b->size; +} + +/* + * Print message specified by @format and args. + * Automatically manage buffer space and transparently handle + * buffer overruns. + * + * Returns number of bytes that should have been printed. + */ +int +bprintf(struct buf_pr *b, char *format, ...) +{ + va_list args; + int i; + + va_start(args, format); + + i = vsnprintf(b->ptr, b->avail, format, args); + va_end(args); + + if (i > b->avail || i < 0) { + /* Overflow or print error */ + b->avail = 0; + } else { + b->ptr += i; + b->avail -= i; + } + + b->needed += i; + + return (i); +} + +/* + * Special values printer for tablearg-aware opcodes. + */ +void +bprint_uint_arg(struct buf_pr *bp, const char *str, uint32_t arg) +{ + + if (str != NULL) + bprintf(bp, "%s", str); + if (arg == IP_FW_TARG) + bprintf(bp, "tablearg"); + else + bprintf(bp, "%u", arg); +} + /* * Helper routine to print a possibly unaligned uint64_t on * various platform. If width > 0, print the value with @@ -377,7 +471,7 @@ static struct _s_x rule_options[] = { * otherwise, return the required width. */ int -pr_u64(uint64_t *pd, int width) +pr_u64(struct buf_pr *b, uint64_t *pd, int width) { #ifdef TCC #define U64_FMT "I64" @@ -390,11 +484,12 @@ pr_u64(uint64_t *pd, int width) bcopy (pd, &u, sizeof(u)); d = u; return (width > 0) ? - printf("%*" U64_FMT " ", width, d) : + bprintf(b, "%*" U64_FMT " ", width, d) : snprintf(NULL, 0, "%" U64_FMT, d) ; #undef U64_FMT } + void * safe_calloc(size_t number, size_t size) { @@ -416,6 +511,26 @@ safe_realloc(void *ptr, size_t size) } /* + * Compare things like interface or table names. + */ +int +stringnum_cmp(const char *a, const char *b) +{ + int la, lb; + + la = strlen(a); + lb = strlen(b); + + if (la > lb) + return (1); + else if (la < lb) + return (-01); + + return (strcmp(a, b)); +} + + +/* * conditionally runs the command. * Selected options or negative -> getsockopt */ @@ -448,20 +563,50 @@ do_cmd(int optname, void *optval, uintptr_t optlen) } /* - * do_setcmd3 - pass ipfw control cmd to kernel + * do_set3 - pass ipfw control cmd to kernel * @optname: option name * @optval: pointer to option data * @optlen: option length * - * Function encapsulates option value in IP_FW3 socket option - * and calls setsockopt(). - * Function returns 0 on success or -1 otherwise. + * Assumes op3 header is already embedded. + * Calls setsockopt() with IP_FW3 as kernel-visible opcode. + * Returns 0 on success or errno otherwise. */ -static int -do_setcmd3(int optname, void *optval, socklen_t optlen) +int +do_set3(int optname, ip_fw3_opheader *op3, uintptr_t optlen) { - socklen_t len; - ip_fw3_opheader *op3; + int errno; + + if (co.test_only) + return (0); + + if (ipfw_socket == -1) + ipfw_socket = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + if (ipfw_socket < 0) + err(EX_UNAVAILABLE, "socket"); + + op3->opcode = optname; + + if (setsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, optlen) != 0) + return (errno); + + return (0); +} + +/* + * do_get3 - pass ipfw control cmd to kernel + * @optname: option name + * @optval: pointer to option data + * @optlen: pointer to option length + * + * Assumes op3 header is already embedded. + * Calls getsockopt() with IP_FW3 as kernel-visible opcode. + * Returns 0 on success or errno otherwise. + */ +int +do_get3(int optname, ip_fw3_opheader *op3, size_t *optlen) +{ + int error; if (co.test_only) return (0); @@ -471,14 +616,17 @@ do_setcmd3(int optname, void *optval, socklen_t optlen) if (ipfw_socket < 0) err(EX_UNAVAILABLE, "socket"); - len = sizeof(ip_fw3_opheader) + optlen; - op3 = alloca(len); - /* Zero reserved fields */ - memset(op3, 0, sizeof(ip_fw3_opheader)); - memcpy(op3 + 1, optval, optlen); op3->opcode = optname; - return setsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, len); + error = getsockopt(ipfw_socket, IPPROTO_IP, IP_FW3, op3, + (socklen_t *)optlen); + + if (error == -1) { + if (errno != 0) + error = errno; + } + + return (error); } /** @@ -494,7 +642,38 @@ match_token(struct _s_x *table, char *string) for (pt = table ; i && pt->s != NULL ; pt++) if (strlen(pt->s) == i && !bcmp(string, pt->s, i)) return pt->x; - return -1; + return (-1); +} + +/** + * match_token takes a table and a string, returns the value associated + * with the string for the best match. + * + * Returns: + * value from @table for matched records + * -1 for non-matched records + * -2 if more than one records match @string. + */ +int +match_token_relaxed(struct _s_x *table, char *string) +{ + struct _s_x *pt, *m; + int i, c; + + i = strlen(string); + c = 0; + + for (pt = table ; i != 0 && pt->s != NULL ; pt++) { + if (strncmp(pt->s, string, i) != 0) + continue; + m = pt; + c++; + } + + if (c == 1) + return (m->x); + + return (c > 0 ? -2: -1); } /** @@ -510,6 +689,73 @@ match_value(struct _s_x *p, int value) return NULL; } +size_t +concat_tokens(char *buf, size_t bufsize, struct _s_x *table, char *delimiter) +{ + struct _s_x *pt; + int l; + size_t sz; + + for (sz = 0, pt = table ; pt->s != NULL; pt++) { + l = snprintf(buf + sz, bufsize - sz, "%s%s", + (sz == 0) ? "" : delimiter, pt->s); + sz += l; + bufsize += l; + if (sz > bufsize) + return (bufsize); + } + + return (sz); +} + +/* + * helper function to process a set of flags and set bits in the + * appropriate masks. + */ +void +fill_flags(struct _s_x *flags, char *p, uint8_t *set, uint8_t *clear) +{ + char *q; /* points to the separator */ + int val; + uint8_t *which; /* mask we are working on */ + + while (p && *p) { + if (*p == '!') { + p++; + which = clear; + } else + which = set; + q = strchr(p, ','); + if (q) + *q++ = '\0'; + val = match_token(flags, p); + if (val <= 0) + errx(EX_DATAERR, "invalid flag %s", p); + *which |= (uint8_t)val; + p = q; + } +} + +void +print_flags_buffer(char *buf, size_t sz, struct _s_x *list, uint8_t set) +{ + char const *comma = ""; + int i, l; + + for (i = 0; list[i].x != 0; i++) { + if ((set & list[i].x) == 0) + continue; + + set &= ~list[i].x; + l = snprintf(buf, sz, "%s%s", comma, list[i].s); + if (l >= sz) + return; + comma = ","; + buf += l; + sz -=l; + } +} + /* * _substrcmp takes two strings and returns 1 if they do not match, * and 0 if they match exactly or the first string is a sub-string @@ -564,16 +810,16 @@ _substrcmp2(const char *str1, const char* str2, const char* str3) * prints one port, symbolic or numeric */ static void -print_port(int proto, uint16_t port) +print_port(struct buf_pr *bp, int proto, uint16_t port) { if (proto == IPPROTO_ETHERTYPE) { char const *s; if (co.do_resolv && (s = match_value(ether_types, port)) ) - printf("%s", s); + bprintf(bp, "%s", s); else - printf("0x%04x", port); + bprintf(bp, "0x%04x", port); } else { struct servent *se = NULL; if (co.do_resolv) { @@ -582,9 +828,9 @@ print_port(int proto, uint16_t port) se = getservbyport(htons(port), pe ? pe->p_name : NULL); } if (se) - printf("%s", se->s_name); + bprintf(bp, "%s", se->s_name); else - printf("%d", port); + bprintf(bp, "%d", port); } } @@ -606,7 +852,7 @@ static struct _s_x _port_name[] = { * XXX todo: add support for mask. */ static void -print_newports(ipfw_insn_u16 *cmd, int proto, int opcode) +print_newports(struct buf_pr *bp, ipfw_insn_u16 *cmd, int proto, int opcode) { uint16_t *p = cmd->ports; int i; @@ -616,15 +862,15 @@ print_newports(ipfw_insn_u16 *cmd, int proto, int opcode) sep = match_value(_port_name, opcode); if (sep == NULL) sep = "???"; - printf (" %s", sep); + bprintf(bp, " %s", sep); } sep = " "; for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) { - printf("%s", sep); - print_port(proto, p[0]); + bprintf(bp, "%s", sep); + print_port(bp, proto, p[0]); if (p[0] != p[1]) { - printf("-"); - print_port(proto, p[1]); + bprintf(bp, "-"); + print_port(bp, proto, p[1]); } sep = ","; } @@ -824,14 +1070,14 @@ fill_reject_code(u_short *codep, char *str) } static void -print_reject_code(uint16_t code) +print_reject_code(struct buf_pr *bp, uint16_t code) { - char const *s = match_value(icmpcodes, code); + char const *s; - if (s != NULL) - printf("unreach %s", s); + if ((s = match_value(icmpcodes, code)) != NULL) + bprintf(bp, "unreach %s", s); else - printf("unreach %u", code); + bprintf(bp, "unreach %u", code); } /* @@ -864,7 +1110,8 @@ contigmask(uint8_t *p, int len) * There is a specialized check for f_tcpflags. */ static void -print_flags(char const *name, ipfw_insn *cmd, struct _s_x *list) +print_flags(struct buf_pr *bp, char const *name, ipfw_insn *cmd, + struct _s_x *list) { char const *comma = ""; int i; @@ -872,34 +1119,38 @@ print_flags(char const *name, ipfw_insn *cmd, struct _s_x *list) uint8_t clear = (cmd->arg1 >> 8) & 0xff; if (list == f_tcpflags && set == TH_SYN && clear == TH_ACK) { - printf(" setup"); + bprintf(bp, " setup"); return; } - printf(" %s ", name); + bprintf(bp, " %s ", name); for (i=0; list[i].x != 0; i++) { if (set & list[i].x) { set &= ~list[i].x; - printf("%s%s", comma, list[i].s); + bprintf(bp, "%s%s", comma, list[i].s); comma = ","; } if (clear & list[i].x) { clear &= ~list[i].x; - printf("%s!%s", comma, list[i].s); + bprintf(bp, "%s!%s", comma, list[i].s); comma = ","; } } } + /* * Print the ip address contained in a command. */ static void -print_ip(ipfw_insn_ip *cmd, char const *s) +print_ip(struct buf_pr *bp, struct format_opts *fo, ipfw_insn_ip *cmd, + char const *s) { struct hostent *he = NULL; + struct in_addr *ia; uint32_t len = F_LEN((ipfw_insn *)cmd); uint32_t *a = ((ipfw_insn_u32 *)cmd)->d; + char *t; if (cmd->o.opcode == O_IP_DST_LOOKUP && len > F_INSN_SIZE(ipfw_insn_u32)) { uint32_t d = a[1]; @@ -907,22 +1158,24 @@ print_ip(ipfw_insn_ip *cmd, char const *s) if (d < sizeof(lookup_key)/sizeof(lookup_key[0])) arg = match_value(rule_options, lookup_key[d]); - printf("%s lookup %s %d", cmd->o.len & F_NOT ? " not": "", - arg, cmd->o.arg1); + t = table_search_ctlv(fo->tstate, ((ipfw_insn *)cmd)->arg1); + bprintf(bp, "%s lookup %s %s", cmd->o.len & F_NOT ? " not": "", + arg, t); return; } - printf("%s%s ", cmd->o.len & F_NOT ? " not": "", s); + bprintf(bp, "%s%s ", cmd->o.len & F_NOT ? " not": "", s); if (cmd->o.opcode == O_IP_SRC_ME || cmd->o.opcode == O_IP_DST_ME) { - printf("me"); + bprintf(bp, "me"); return; } if (cmd->o.opcode == O_IP_SRC_LOOKUP || cmd->o.opcode == O_IP_DST_LOOKUP) { - printf("table(%u", ((ipfw_insn *)cmd)->arg1); + t = table_search_ctlv(fo->tstate, ((ipfw_insn *)cmd)->arg1); + bprintf(bp, "table(%s", t); if (len == F_INSN_SIZE(ipfw_insn_u32)) - printf(",%u", *a); - printf(")"); + bprintf(bp, ",%u", *a); + bprintf(bp, ")"); return; } if (cmd->o.opcode == O_IP_SRC_SET || cmd->o.opcode == O_IP_DST_SET) { @@ -933,7 +1186,7 @@ print_ip(ipfw_insn_ip *cmd, char const *s) x = cmd->o.arg1 - 1; x = htonl( ~x ); cmd->addr.s_addr = htonl(cmd->addr.s_addr); - printf("%s/%d", inet_ntoa(cmd->addr), + bprintf(bp, "%s/%d", inet_ntoa(cmd->addr), contigmask((uint8_t *)&x, 32)); x = cmd->addr.s_addr = htonl(cmd->addr.s_addr); x &= 0xff; /* base */ @@ -948,14 +1201,14 @@ print_ip(ipfw_insn_ip *cmd, char const *s) for (j=i+1; j < cmd->o.arg1; j++) if (!(map[ j/32] & (1<<(j & 31)))) break; - printf("%c%d", comma, i+x); + bprintf(bp, "%c%d", comma, i+x); if (j>i+2) { /* range has at least 3 elements */ - printf("-%d", j-1+x); + bprintf(bp, "-%d", j-1+x); i = j-1; } comma = ','; } - printf("}"); + bprintf(bp, "}"); return; } /* @@ -970,18 +1223,19 @@ print_ip(ipfw_insn_ip *cmd, char const *s) if (mb == 32 && co.do_resolv) he = gethostbyaddr((char *)&(a[0]), sizeof(u_long), AF_INET); if (he != NULL) /* resolved to name */ - printf("%s", he->h_name); + bprintf(bp, "%s", he->h_name); else if (mb == 0) /* any */ - printf("any"); + bprintf(bp, "any"); else { /* numeric IP followed by some kind of mask */ - printf("%s", inet_ntoa( *((struct in_addr *)&a[0]) ) ); + ia = (struct in_addr *)&a[0]; + bprintf(bp, "%s", inet_ntoa(*ia)); if (mb < 0) - printf(":%s", inet_ntoa( *((struct in_addr *)&a[1]) ) ); + bprintf(bp, ":%s", inet_ntoa(*ia ) ); else if (mb < 32) - printf("/%d", mb); + bprintf(bp, "/%d", mb); } if (len > 1) - printf(","); + bprintf(bp, ","); } } @@ -989,21 +1243,21 @@ print_ip(ipfw_insn_ip *cmd, char const *s) * prints a MAC address/mask pair */ static void -print_mac(uint8_t *addr, uint8_t *mask) +print_mac(struct buf_pr *bp, uint8_t *addr, uint8_t *mask) { int l = contigmask(mask, 48); if (l == 0) - printf(" any"); + bprintf(bp, " any"); else { - printf(" %02x:%02x:%02x:%02x:%02x:%02x", + bprintf(bp, " %02x:%02x:%02x:%02x:%02x:%02x", addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); if (l == -1) - printf("&%02x:%02x:%02x:%02x:%02x:%02x", + bprintf(bp, "&%02x:%02x:%02x:%02x:%02x:%02x", mask[0], mask[1], mask[2], mask[3], mask[4], mask[5]); else if (l < 48) - printf("/%d", l); + bprintf(bp, "/%d", l); } } @@ -1032,38 +1286,38 @@ fill_icmptypes(ipfw_insn_u32 *cmd, char *av) } static void -print_icmptypes(ipfw_insn_u32 *cmd) +print_icmptypes(struct buf_pr *bp, ipfw_insn_u32 *cmd) { int i; char sep= ' '; - printf(" icmptypes"); + bprintf(bp, " icmptypes"); for (i = 0; i < 32; i++) { if ( (cmd->d[0] & (1 << (i))) == 0) continue; - printf("%c%d", sep, i); + bprintf(bp, "%c%d", sep, i); sep = ','; } } static void -print_dscp(ipfw_insn_u32 *cmd) +print_dscp(struct buf_pr *bp, ipfw_insn_u32 *cmd) { int i, c; uint32_t *v; char sep= ' '; const char *code; - printf(" dscp"); + bprintf(bp, " dscp"); i = 0; c = 0; v = cmd->d; while (i < 64) { if (*v & (1 << i)) { if ((code = match_value(f_ipdscp, i)) != NULL) - printf("%c%s", sep, code); + bprintf(bp, "%c%s", sep, code); else - printf("%c%d", sep, i); + bprintf(bp, "%c%d", sep, i); sep = ','; } @@ -1094,7 +1348,7 @@ print_dscp(ipfw_insn_u32 *cmd) #define HAVE_OPTIONS 0x8000 static void -show_prerequisites(int *flags, int want, int cmd) +show_prerequisites(struct buf_pr *bp, int *flags, int want, int cmd) { (void)cmd; /* UNUSED */ if (co.comment_only) @@ -1105,22 +1359,23 @@ show_prerequisites(int *flags, int want, int cmd) if ( !(*flags & HAVE_OPTIONS)) { if ( !(*flags & HAVE_PROTO) && (want & HAVE_PROTO)) { if ( (*flags & HAVE_PROTO4)) - printf(" ip4"); + bprintf(bp, " ip4"); else if ( (*flags & HAVE_PROTO6)) - printf(" ip6"); + bprintf(bp, " ip6"); else - printf(" ip"); + bprintf(bp, " ip"); } if ( !(*flags & HAVE_SRCIP) && (want & HAVE_SRCIP)) - printf(" from any"); + bprintf(bp, " from any"); if ( !(*flags & HAVE_DSTIP) && (want & HAVE_DSTIP)) - printf(" to any"); + bprintf(bp, " to any"); } *flags |= want; } static void -show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) +show_static_rule(struct cmdline_opts *co, struct format_opts *fo, + struct buf_pr *bp, struct ip_fw_rule *rule, struct ip_fw_bcounter *cntr) { static int twidth = 0; int l; @@ -1131,26 +1386,26 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) ipfw_insn_log *logptr = NULL; /* set if we find an O_LOG */ ipfw_insn_altq *altqptr = NULL; /* set if we find an O_ALTQ */ int or_block = 0; /* we are in an or block */ - uint32_t set_disable; - - bcopy(&rule->next_rule, &set_disable, sizeof(set_disable)); + uint32_t uval; - if (set_disable & (1 << rule->set)) { /* disabled */ - if (!co.show_sets) + if ((fo->set_mask & (1 << rule->set)) == 0) { + /* disabled mask */ + if (!co->show_sets) return; else - printf("# DISABLED "); + bprintf(bp, "# DISABLED "); } - printf("%05u ", rule->rulenum); + bprintf(bp, "%05u ", rule->rulenum); - if (pcwidth > 0 || bcwidth > 0) { - pr_u64(&rule->pcnt, pcwidth); - pr_u64(&rule->bcnt, bcwidth); + /* Print counters if enabled */ + if (fo->pcwidth > 0 || fo->bcwidth > 0) { + pr_u64(bp, &cntr->pcnt, fo->pcwidth); + pr_u64(bp, &cntr->bcnt, fo->bcwidth); } - if (co.do_time == 2) - printf("%10u ", rule->timestamp); - else if (co.do_time == 1) { + if (co->do_time == 2) + bprintf(bp, "%10u ", cntr->timestamp); + else if (co->do_time == 1) { char timestr[30]; time_t t = (time_t)0; @@ -1159,19 +1414,19 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) *strchr(timestr, '\n') = '\0'; twidth = strlen(timestr); } - if (rule->timestamp) { - t = _long_to_time(rule->timestamp); + if (cntr->timestamp > 0) { + t = _long_to_time(cntr->timestamp); strcpy(timestr, ctime(&t)); *strchr(timestr, '\n') = '\0'; - printf("%s ", timestr); + bprintf(bp, "%s ", timestr); } else { - printf("%*s", twidth, " "); + bprintf(bp, "%*s", twidth, " "); } } - if (co.show_sets) - printf("set %d ", rule->set); + if (co->show_sets) + bprintf(bp, "set %d ", rule->set); /* * print the optional "match probability" @@ -1183,7 +1438,7 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) double d = 1.0 * p->d[0]; d = (d / 0x7fffffff); - printf("prob %f ", d); + bprintf(bp, "prob %f ", d); } } @@ -1194,66 +1449,66 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) l > 0 ; l -= F_LEN(cmd), cmd += F_LEN(cmd)) { switch(cmd->opcode) { case O_CHECK_STATE: - printf("check-state"); + bprintf(bp, "check-state"); /* avoid printing anything else */ flags = HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP | HAVE_IP; break; case O_ACCEPT: - printf("allow"); + bprintf(bp, "allow"); break; case O_COUNT: - printf("count"); + bprintf(bp, "count"); break; case O_DENY: - printf("deny"); + bprintf(bp, "deny"); break; case O_REJECT: if (cmd->arg1 == ICMP_REJECT_RST) - printf("reset"); + bprintf(bp, "reset"); else if (cmd->arg1 == ICMP_UNREACH_HOST) - printf("reject"); + bprintf(bp, "reject"); else - print_reject_code(cmd->arg1); + print_reject_code(bp, cmd->arg1); break; case O_UNREACH6: if (cmd->arg1 == ICMP6_UNREACH_RST) - printf("reset6"); + bprintf(bp, "reset6"); else print_unreach6_code(cmd->arg1); break; case O_SKIPTO: - PRINT_UINT_ARG("skipto ", cmd->arg1); + bprint_uint_arg(bp, "skipto ", cmd->arg1); break; case O_PIPE: - PRINT_UINT_ARG("pipe ", cmd->arg1); + bprint_uint_arg(bp, "pipe ", cmd->arg1); break; case O_QUEUE: - PRINT_UINT_ARG("queue ", cmd->arg1); + bprint_uint_arg(bp, "queue ", cmd->arg1); break; case O_DIVERT: - PRINT_UINT_ARG("divert ", cmd->arg1); + bprint_uint_arg(bp, "divert ", cmd->arg1); break; case O_TEE: - PRINT_UINT_ARG("tee ", cmd->arg1); + bprint_uint_arg(bp, "tee ", cmd->arg1); break; case O_NETGRAPH: - PRINT_UINT_ARG("netgraph ", cmd->arg1); + bprint_uint_arg(bp, "netgraph ", cmd->arg1); break; case O_NGTEE: - PRINT_UINT_ARG("ngtee ", cmd->arg1); + bprint_uint_arg(bp, "ngtee ", cmd->arg1); break; case O_FORWARD_IP: @@ -1261,12 +1516,12 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) ipfw_insn_sa *s = (ipfw_insn_sa *)cmd; if (s->sa.sin_addr.s_addr == INADDR_ANY) { - printf("fwd tablearg"); + bprintf(bp, "fwd tablearg"); } else { - printf("fwd %s", inet_ntoa(s->sa.sin_addr)); + bprintf(bp, "fwd %s",inet_ntoa(s->sa.sin_addr)); } if (s->sa.sin_port) - printf(",%d", s->sa.sin_port); + bprintf(bp, ",%d", s->sa.sin_port); } break; @@ -1275,10 +1530,10 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) char buf[4 + INET6_ADDRSTRLEN + 1]; ipfw_insn_sa6 *s = (ipfw_insn_sa6 *)cmd; - printf("fwd %s", inet_ntop(AF_INET6, &s->sa.sin6_addr, - buf, sizeof(buf))); + bprintf(bp, "fwd %s", inet_ntop(AF_INET6, + &s->sa.sin6_addr, buf, sizeof(buf))); if (s->sa.sin6_port) - printf(",%d", s->sa.sin6_port); + bprintf(bp, ",%d", s->sa.sin6_port); } break; @@ -1296,64 +1551,69 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) case O_NAT: if (cmd->arg1 != 0) - PRINT_UINT_ARG("nat ", cmd->arg1); + bprint_uint_arg(bp, "nat ", cmd->arg1); else - printf("nat global"); + bprintf(bp, "nat global"); break; case O_SETFIB: - PRINT_UINT_ARG("setfib ", cmd->arg1); + bprint_uint_arg(bp, "setfib ", cmd->arg1 & 0x7FFF); break; case O_SETDSCP: { const char *code; - if ((code = match_value(f_ipdscp, cmd->arg1)) != NULL) - printf("setdscp %s", code); + if (cmd->arg1 == IP_FW_TARG) { + bprint_uint_arg(bp, "setdscp ", cmd->arg1); + break; + } + uval = cmd->arg1 & 0x3F; + if ((code = match_value(f_ipdscp, uval)) != NULL) + bprintf(bp, "setdscp %s", code); else - PRINT_UINT_ARG("setdscp ", cmd->arg1); + bprint_uint_arg(bp, "setdscp ", uval); } break; case O_REASS: - printf("reass"); + bprintf(bp, "reass"); break; case O_CALLRETURN: if (cmd->len & F_NOT) - printf("return"); + bprintf(bp, "return"); else - PRINT_UINT_ARG("call ", cmd->arg1); + bprint_uint_arg(bp, "call ", cmd->arg1); break; default: - printf("** unrecognized action %d len %d ", + bprintf(bp, "** unrecognized action %d len %d ", cmd->opcode, cmd->len); } } if (logptr) { if (logptr->max_log > 0) - printf(" log logamount %d", logptr->max_log); + bprintf(bp, " log logamount %d", logptr->max_log); else - printf(" log"); + bprintf(bp, " log"); } #ifndef NO_ALTQ if (altqptr) { - print_altq_cmd(altqptr); + print_altq_cmd(bp, altqptr); } #endif if (tagptr) { if (tagptr->len & F_NOT) - PRINT_UINT_ARG(" untag ", tagptr->arg1); + bprint_uint_arg(bp, " untag ", tagptr->arg1); else - PRINT_UINT_ARG(" tag ", tagptr->arg1); + bprint_uint_arg(bp, " tag ", tagptr->arg1); } /* * then print the body. */ - for (l = rule->act_ofs, cmd = rule->cmd ; + for (l = rule->act_ofs, cmd = rule->cmd; l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { if ((cmd->len & F_OR) || (cmd->len & F_NOT)) continue; @@ -1365,31 +1625,31 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) break; } } - if (rule->_pad & 1) { /* empty rules before options */ - if (!co.do_compact) { - show_prerequisites(&flags, HAVE_PROTO, 0); - printf(" from any to any"); + if (rule->flags & IPFW_RULE_NOOPT) { /* empty rules before options */ + if (!co->do_compact) { + show_prerequisites(bp, &flags, HAVE_PROTO, 0); + bprintf(bp, " from any to any"); } flags |= HAVE_IP | HAVE_OPTIONS | HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP; } - if (co.comment_only) + if (co->comment_only) comment = "..."; - for (l = rule->act_ofs, cmd = rule->cmd ; + for (l = rule->act_ofs, cmd = rule->cmd; l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { /* useful alias */ ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; - if (co.comment_only) { + if (co->comment_only) { if (cmd->opcode != O_NOP) continue; - printf(" // %s\n", (char *)(cmd + 1)); + bprintf(bp, " // %s\n", (char *)(cmd + 1)); return; } - show_prerequisites(&flags, 0, cmd->opcode); + show_prerequisites(bp, &flags, 0, cmd->opcode); switch(cmd->opcode) { case O_PROB: @@ -1403,12 +1663,12 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) case O_IP_SRC_MASK: case O_IP_SRC_ME: case O_IP_SRC_SET: - show_prerequisites(&flags, HAVE_PROTO, 0); + show_prerequisites(bp, &flags, HAVE_PROTO, 0); if (!(flags & HAVE_SRCIP)) - printf(" from"); + bprintf(bp, " from"); if ((cmd->len & F_OR) && !or_block) - printf(" {"); - print_ip((ipfw_insn_ip *)cmd, + bprintf(bp, " {"); + print_ip(bp, fo, (ipfw_insn_ip *)cmd, (flags & HAVE_OPTIONS) ? " src-ip" : ""); flags |= HAVE_SRCIP; break; @@ -1418,12 +1678,12 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) case O_IP_DST_MASK: case O_IP_DST_ME: case O_IP_DST_SET: - show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0); + show_prerequisites(bp, &flags, HAVE_PROTO|HAVE_SRCIP, 0); if (!(flags & HAVE_DSTIP)) - printf(" to"); + bprintf(bp, " to"); if ((cmd->len & F_OR) && !or_block) - printf(" {"); - print_ip((ipfw_insn_ip *)cmd, + bprintf(bp, " {"); + print_ip(bp, fo, (ipfw_insn_ip *)cmd, (flags & HAVE_OPTIONS) ? " dst-ip" : ""); flags |= HAVE_DSTIP; break; @@ -1431,12 +1691,12 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) case O_IP6_SRC: case O_IP6_SRC_MASK: case O_IP6_SRC_ME: - show_prerequisites(&flags, HAVE_PROTO, 0); + show_prerequisites(bp, &flags, HAVE_PROTO, 0); if (!(flags & HAVE_SRCIP)) - printf(" from"); + bprintf(bp, " from"); if ((cmd->len & F_OR) && !or_block) - printf(" {"); - print_ip6((ipfw_insn_ip6 *)cmd, + bprintf(bp, " {"); + print_ip6(bp, (ipfw_insn_ip6 *)cmd, (flags & HAVE_OPTIONS) ? " src-ip6" : ""); flags |= HAVE_SRCIP | HAVE_PROTO; break; @@ -1444,35 +1704,35 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) case O_IP6_DST: case O_IP6_DST_MASK: case O_IP6_DST_ME: - show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0); + show_prerequisites(bp, &flags, HAVE_PROTO|HAVE_SRCIP, 0); if (!(flags & HAVE_DSTIP)) - printf(" to"); + bprintf(bp, " to"); if ((cmd->len & F_OR) && !or_block) - printf(" {"); - print_ip6((ipfw_insn_ip6 *)cmd, + bprintf(bp, " {"); + print_ip6(bp, (ipfw_insn_ip6 *)cmd, (flags & HAVE_OPTIONS) ? " dst-ip6" : ""); flags |= HAVE_DSTIP; break; case O_FLOW6ID: - print_flow6id( (ipfw_insn_u32 *) cmd ); - flags |= HAVE_OPTIONS; - break; + print_flow6id(bp, (ipfw_insn_u32 *) cmd ); + flags |= HAVE_OPTIONS; + break; case O_IP_DSTPORT: - show_prerequisites(&flags, + show_prerequisites(bp, &flags, HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP | HAVE_IP, 0); case O_IP_SRCPORT: if (flags & HAVE_DSTIP) flags |= HAVE_IP; - show_prerequisites(&flags, + show_prerequisites(bp, &flags, HAVE_PROTO | HAVE_SRCIP, 0); if ((cmd->len & F_OR) && !or_block) - printf(" {"); + bprintf(bp, " {"); if (cmd->len & F_NOT) - printf(" not"); - print_newports((ipfw_insn_u16 *)cmd, proto, + bprintf(bp, " not"); + print_newports(bp, (ipfw_insn_u16 *)cmd, proto, (flags & HAVE_OPTIONS) ? cmd->opcode : 0); break; @@ -1480,22 +1740,22 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) struct protoent *pe = NULL; if ((cmd->len & F_OR) && !or_block) - printf(" {"); + bprintf(bp, " {"); if (cmd->len & F_NOT) - printf(" not"); + bprintf(bp, " not"); proto = cmd->arg1; pe = getprotobynumber(cmd->arg1); if ((flags & (HAVE_PROTO4 | HAVE_PROTO6)) && !(flags & HAVE_PROTO)) - show_prerequisites(&flags, + show_prerequisites(bp, &flags, HAVE_PROTO | HAVE_IP | HAVE_SRCIP | HAVE_DSTIP | HAVE_OPTIONS, 0); if (flags & HAVE_OPTIONS) - printf(" proto"); + bprintf(bp, " proto"); if (pe) - printf(" %s", pe->p_name); + bprintf(bp, " %s", pe->p_name); else - printf(" %u", cmd->arg1); + bprintf(bp, " %u", cmd->arg1); } flags |= HAVE_PROTO; break; @@ -1507,68 +1767,68 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) ((cmd->opcode == O_IP4) && (flags & HAVE_PROTO4))) break; - show_prerequisites(&flags, HAVE_PROTO | HAVE_SRCIP | + show_prerequisites(bp, &flags, HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP | HAVE_IP | HAVE_OPTIONS, 0); if ((cmd->len & F_OR) && !or_block) - printf(" {"); + bprintf(bp, " {"); if (cmd->len & F_NOT && cmd->opcode != O_IN) - printf(" not"); + bprintf(bp, " not"); switch(cmd->opcode) { case O_MACADDR2: { ipfw_insn_mac *m = (ipfw_insn_mac *)cmd; - printf(" MAC"); - print_mac(m->addr, m->mask); - print_mac(m->addr + 6, m->mask + 6); + bprintf(bp, " MAC"); + print_mac(bp, m->addr, m->mask); + print_mac(bp, m->addr + 6, m->mask + 6); } break; case O_MAC_TYPE: - print_newports((ipfw_insn_u16 *)cmd, + print_newports(bp, (ipfw_insn_u16 *)cmd, IPPROTO_ETHERTYPE, cmd->opcode); break; case O_FRAG: - printf(" frag"); + bprintf(bp, " frag"); break; case O_FIB: - printf(" fib %u", cmd->arg1 ); + bprintf(bp, " fib %u", cmd->arg1 ); break; case O_SOCKARG: - printf(" sockarg"); + bprintf(bp, " sockarg"); break; case O_IN: - printf(cmd->len & F_NOT ? " out" : " in"); + bprintf(bp, cmd->len & F_NOT ? " out" : " in"); break; case O_DIVERTED: switch (cmd->arg1) { case 3: - printf(" diverted"); + bprintf(bp, " diverted"); break; case 1: - printf(" diverted-loopback"); + bprintf(bp, " diverted-loopback"); break; case 2: - printf(" diverted-output"); + bprintf(bp, " diverted-output"); break; default: - printf(" diverted-?<%u>", cmd->arg1); + bprintf(bp, " diverted-?<%u>", cmd->arg1); break; } break; case O_LAYER2: - printf(" layer2"); + bprintf(bp, " layer2"); break; case O_XMIT: case O_RECV: case O_VIA: { - char const *s; + char const *s, *t; ipfw_insn_if *cmdif = (ipfw_insn_if *)cmd; if (cmd->opcode == O_XMIT) @@ -1578,97 +1838,112 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) else /* if (cmd->opcode == O_VIA) */ s = "via"; if (cmdif->name[0] == '\0') - printf(" %s %s", s, + bprintf(bp, " %s %s", s, inet_ntoa(cmdif->p.ip)); - else if (cmdif->name[0] == '\1') /* interface table */ - printf(" %s table(%d)", s, cmdif->p.glob); - else - printf(" %s %s", s, cmdif->name); + else if (cmdif->name[0] == '\1') { + /* interface table */ + t = table_search_ctlv(fo->tstate, + cmdif->p.kidx); + bprintf(bp, " %s table(%s)", s, t); + } else + bprintf(bp, " %s %s", s, cmdif->name); break; } + case O_IP_FLOW_LOOKUP: + { + char *t; + + t = table_search_ctlv(fo->tstate, cmd->arg1); + bprintf(bp, " flow table(%s", t); + if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) + bprintf(bp, ",%u", + ((ipfw_insn_u32 *)cmd)->d[0]); + bprintf(bp, ")"); + break; + } case O_IPID: if (F_LEN(cmd) == 1) - printf(" ipid %u", cmd->arg1 ); + bprintf(bp, " ipid %u", cmd->arg1 ); else - print_newports((ipfw_insn_u16 *)cmd, 0, + print_newports(bp, (ipfw_insn_u16 *)cmd, 0, O_IPID); break; case O_IPTTL: if (F_LEN(cmd) == 1) - printf(" ipttl %u", cmd->arg1 ); + bprintf(bp, " ipttl %u", cmd->arg1 ); else - print_newports((ipfw_insn_u16 *)cmd, 0, + print_newports(bp, (ipfw_insn_u16 *)cmd, 0, O_IPTTL); break; case O_IPVER: - printf(" ipver %u", cmd->arg1 ); + bprintf(bp, " ipver %u", cmd->arg1 ); break; case O_IPPRECEDENCE: - printf(" ipprecedence %u", (cmd->arg1) >> 5 ); + bprintf(bp, " ipprecedence %u", cmd->arg1 >> 5); break; case O_DSCP: - print_dscp((ipfw_insn_u32 *)cmd); + print_dscp(bp, (ipfw_insn_u32 *)cmd); break; case O_IPLEN: if (F_LEN(cmd) == 1) - printf(" iplen %u", cmd->arg1 ); + bprintf(bp, " iplen %u", cmd->arg1 ); else - print_newports((ipfw_insn_u16 *)cmd, 0, + print_newports(bp, (ipfw_insn_u16 *)cmd, 0, O_IPLEN); break; case O_IPOPT: - print_flags("ipoptions", cmd, f_ipopts); + print_flags(bp, "ipoptions", cmd, f_ipopts); break; case O_IPTOS: - print_flags("iptos", cmd, f_iptos); + print_flags(bp, "iptos", cmd, f_iptos); break; case O_ICMPTYPE: - print_icmptypes((ipfw_insn_u32 *)cmd); + print_icmptypes(bp, (ipfw_insn_u32 *)cmd); break; case O_ESTAB: - printf(" established"); + bprintf(bp, " established"); break; case O_TCPDATALEN: if (F_LEN(cmd) == 1) - printf(" tcpdatalen %u", cmd->arg1 ); + bprintf(bp, " tcpdatalen %u", cmd->arg1 ); else - print_newports((ipfw_insn_u16 *)cmd, 0, + print_newports(bp, (ipfw_insn_u16 *)cmd, 0, O_TCPDATALEN); break; case O_TCPFLAGS: - print_flags("tcpflags", cmd, f_tcpflags); + print_flags(bp, "tcpflags", cmd, f_tcpflags); break; case O_TCPOPTS: - print_flags("tcpoptions", cmd, f_tcpopts); + print_flags(bp, "tcpoptions", cmd, f_tcpopts); break; case O_TCPWIN: if (F_LEN(cmd) == 1) - printf(" tcpwin %u", cmd->arg1); + bprintf(bp, " tcpwin %u", cmd->arg1); else - print_newports((ipfw_insn_u16 *)cmd, 0, + print_newports(bp, (ipfw_insn_u16 *)cmd, 0, O_TCPWIN); break; case O_TCPACK: - printf(" tcpack %d", ntohl(cmd32->d[0])); + bprintf(bp, " tcpack %d", ntohl(cmd32->d[0])); break; case O_TCPSEQ: - printf(" tcpseq %d", ntohl(cmd32->d[0])); + bprintf(bp, " tcpseq %d", ntohl(cmd32->d[0])); break; case O_UID: @@ -1676,9 +1951,9 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) struct passwd *pwd = getpwuid(cmd32->d[0]); if (pwd) - printf(" uid %s", pwd->pw_name); + bprintf(bp, " uid %s", pwd->pw_name); else - printf(" uid %u", cmd32->d[0]); + bprintf(bp, " uid %u", cmd32->d[0]); } break; @@ -1687,30 +1962,30 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) struct group *grp = getgrgid(cmd32->d[0]); if (grp) - printf(" gid %s", grp->gr_name); + bprintf(bp, " gid %s", grp->gr_name); else - printf(" gid %u", cmd32->d[0]); + bprintf(bp, " gid %u", cmd32->d[0]); } break; case O_JAIL: - printf(" jail %d", cmd32->d[0]); + bprintf(bp, " jail %d", cmd32->d[0]); break; case O_VERREVPATH: - printf(" verrevpath"); + bprintf(bp, " verrevpath"); break; case O_VERSRCREACH: - printf(" versrcreach"); + bprintf(bp, " versrcreach"); break; case O_ANTISPOOF: - printf(" antispoof"); + bprintf(bp, " antispoof"); break; case O_IPSEC: - printf(" ipsec"); + bprintf(bp, " ipsec"); break; case O_NOP: @@ -1718,7 +1993,7 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) break; case O_KEEP_STATE: - printf(" keep-state"); + bprintf(bp, " keep-state"); break; case O_LIMIT: { @@ -1727,113 +2002,126 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) uint8_t x = c->limit_mask; char const *comma = " "; - printf(" limit"); + bprintf(bp, " limit"); for (; p->x != 0 ; p++) if ((x & p->x) == p->x) { x &= ~p->x; - printf("%s%s", comma, p->s); + bprintf(bp, "%s%s", comma,p->s); comma = ","; } - PRINT_UINT_ARG(" ", c->conn_limit); + bprint_uint_arg(bp, " ", c->conn_limit); break; } case O_IP6: - printf(" ip6"); + bprintf(bp, " ip6"); break; case O_IP4: - printf(" ip4"); + bprintf(bp, " ip4"); break; case O_ICMP6TYPE: - print_icmp6types((ipfw_insn_u32 *)cmd); + print_icmp6types(bp, (ipfw_insn_u32 *)cmd); break; case O_EXT_HDR: - print_ext6hdr( (ipfw_insn *) cmd ); + print_ext6hdr(bp, (ipfw_insn *)cmd); break; case O_TAGGED: if (F_LEN(cmd) == 1) - PRINT_UINT_ARG(" tagged ", cmd->arg1); + bprint_uint_arg(bp, " tagged ", + cmd->arg1); else - print_newports((ipfw_insn_u16 *)cmd, 0, - O_TAGGED); + print_newports(bp, (ipfw_insn_u16 *)cmd, + 0, O_TAGGED); break; default: - printf(" [opcode %d len %d]", + bprintf(bp, " [opcode %d len %d]", cmd->opcode, cmd->len); } } if (cmd->len & F_OR) { - printf(" or"); + bprintf(bp, " or"); or_block = 1; } else if (or_block) { - printf(" }"); + bprintf(bp, " }"); or_block = 0; } } - show_prerequisites(&flags, HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP + show_prerequisites(bp, &flags, HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP | HAVE_IP, 0); if (comment) - printf(" // %s", comment); - printf("\n"); + bprintf(bp, " // %s", comment); + bprintf(bp, "\n"); } static void -show_dyn_ipfw(ipfw_dyn_rule *d, int pcwidth, int bcwidth) +show_dyn_state(struct cmdline_opts *co, struct format_opts *fo, + struct buf_pr *bp, ipfw_dyn_rule *d) { struct protoent *pe; struct in_addr a; uint16_t rulenum; char buf[INET6_ADDRSTRLEN]; - if (!co.do_expired) { + if (!co->do_expired) { if (!d->expire && !(d->dyn_type == O_LIMIT_PARENT)) return; } bcopy(&d->rule, &rulenum, sizeof(rulenum)); - printf("%05d", rulenum); - if (pcwidth > 0 || bcwidth > 0) { - printf(" "); - pr_u64(&d->pcnt, pcwidth); - pr_u64(&d->bcnt, bcwidth); - printf("(%ds)", d->expire); + bprintf(bp, "%05d", rulenum); + if (fo->pcwidth > 0 || fo->bcwidth > 0) { + bprintf(bp, " "); + pr_u64(bp, &d->pcnt, fo->pcwidth); + pr_u64(bp, &d->bcnt, fo->bcwidth); + bprintf(bp, "(%ds)", d->expire); } switch (d->dyn_type) { case O_LIMIT_PARENT: - printf(" PARENT %d", d->count); + bprintf(bp, " PARENT %d", d->count); break; case O_LIMIT: - printf(" LIMIT"); + bprintf(bp, " LIMIT"); break; case O_KEEP_STATE: /* bidir, no mask */ - printf(" STATE"); + bprintf(bp, " STATE"); break; } if ((pe = getprotobynumber(d->id.proto)) != NULL) - printf(" %s", pe->p_name); + bprintf(bp, " %s", pe->p_name); else - printf(" proto %u", d->id.proto); + bprintf(bp, " proto %u", d->id.proto); if (d->id.addr_type == 4) { a.s_addr = htonl(d->id.src_ip); - printf(" %s %d", inet_ntoa(a), d->id.src_port); + bprintf(bp, " %s %d", inet_ntoa(a), d->id.src_port); a.s_addr = htonl(d->id.dst_ip); - printf(" <-> %s %d", inet_ntoa(a), d->id.dst_port); + bprintf(bp, " <-> %s %d", inet_ntoa(a), d->id.dst_port); } else if (d->id.addr_type == 6) { - printf(" %s %d", inet_ntop(AF_INET6, &d->id.src_ip6, buf, + bprintf(bp, " %s %d", inet_ntop(AF_INET6, &d->id.src_ip6, buf, sizeof(buf)), d->id.src_port); - printf(" <-> %s %d", inet_ntop(AF_INET6, &d->id.dst_ip6, buf, - sizeof(buf)), d->id.dst_port); + bprintf(bp, " <-> %s %d", inet_ntop(AF_INET6, &d->id.dst_ip6, + buf, sizeof(buf)), d->id.dst_port); } else - printf(" UNKNOWN <-> UNKNOWN\n"); + bprintf(bp, " UNKNOWN <-> UNKNOWN\n"); +} + +static int +do_range_cmd(int cmd, ipfw_range_tlv *rt) +{ + ipfw_range_header rh; + + memset(&rh, 0, sizeof(rh)); + memcpy(&rh.range, rt, sizeof(*rt)); + rh.range.head.length = sizeof(*rt); + rh.range.head.type = IPFW_TLV_RANGE; - printf("\n"); + return (do_set3(cmd, &rh.opheader, sizeof(rh))); } /* @@ -1846,77 +2134,75 @@ show_dyn_ipfw(ipfw_dyn_rule *d, int pcwidth, int bcwidth) void ipfw_sets_handler(char *av[]) { - uint32_t set_disable, masks[2]; - int i, nbytes; - uint16_t rulenum; - uint8_t cmd, new_set; + uint32_t masks[2]; + int i; + uint8_t cmd, new_set, rulenum; + ipfw_range_tlv rt; + char *msg; + size_t size; av++; + memset(&rt, 0, sizeof(rt)); if (av[0] == NULL) errx(EX_USAGE, "set needs command"); if (_substrcmp(*av, "show") == 0) { - void *data = NULL; - char const *msg; - int nalloc; - - nalloc = nbytes = sizeof(struct ip_fw); - while (nbytes >= nalloc) { - if (data) - free(data); - nalloc = nalloc * 2 + 200; - nbytes = nalloc; - data = safe_calloc(1, nbytes); - if (do_cmd(IP_FW_GET, data, (uintptr_t)&nbytes) < 0) - err(EX_OSERR, "getsockopt(IP_FW_GET)"); - } + struct format_opts fo; + ipfw_cfg_lheader *cfg; - bcopy(&((struct ip_fw *)data)->next_rule, - &set_disable, sizeof(set_disable)); + memset(&fo, 0, sizeof(fo)); + if (ipfw_get_config(&co, &fo, &cfg, &size) != 0) + err(EX_OSERR, "requesting config failed"); - for (i = 0, msg = "disable" ; i < RESVD_SET; i++) - if ((set_disable & (1<<i))) { + for (i = 0, msg = "disable"; i < RESVD_SET; i++) + if ((cfg->set_mask & (1<<i)) == 0) { printf("%s %d", msg, i); msg = ""; } - msg = (set_disable) ? " enable" : "enable"; + msg = (cfg->set_mask != (uint32_t)-1) ? " enable" : "enable"; for (i = 0; i < RESVD_SET; i++) - if (!(set_disable & (1<<i))) { + if ((cfg->set_mask & (1<<i)) != 0) { printf("%s %d", msg, i); msg = ""; } printf("\n"); + free(cfg); } else if (_substrcmp(*av, "swap") == 0) { av++; if ( av[0] == NULL || av[1] == NULL ) errx(EX_USAGE, "set swap needs 2 set numbers\n"); - rulenum = atoi(av[0]); - new_set = atoi(av[1]); - if (!isdigit(*(av[0])) || rulenum > RESVD_SET) + rt.set = atoi(av[0]); + rt.new_set = atoi(av[1]); + if (!isdigit(*(av[0])) || rt.set > RESVD_SET) errx(EX_DATAERR, "invalid set number %s\n", av[0]); - if (!isdigit(*(av[1])) || new_set > RESVD_SET) + if (!isdigit(*(av[1])) || rt.new_set > RESVD_SET) errx(EX_DATAERR, "invalid set number %s\n", av[1]); - masks[0] = (4 << 24) | (new_set << 16) | (rulenum); - i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t)); + i = do_range_cmd(IP_FW_SET_SWAP, &rt); } else if (_substrcmp(*av, "move") == 0) { av++; if (av[0] && _substrcmp(*av, "rule") == 0) { - cmd = 2; + rt.flags = IPFW_RCFLAG_RANGE; /* move rules to new set */ + cmd = IP_FW_XMOVE; av++; } else - cmd = 3; + cmd = IP_FW_SET_MOVE; /* Move set to new one */ if (av[0] == NULL || av[1] == NULL || av[2] == NULL || av[3] != NULL || _substrcmp(av[1], "to") != 0) errx(EX_USAGE, "syntax: set move [rule] X to Y\n"); rulenum = atoi(av[0]); - new_set = atoi(av[2]); - if (!isdigit(*(av[0])) || (cmd == 3 && rulenum > RESVD_SET) || - (cmd == 2 && rulenum == IPFW_DEFAULT_RULE) ) + rt.new_set = atoi(av[2]); + if (cmd == IP_FW_XMOVE) { + rt.start_rule = rulenum; + rt.end_rule = rulenum; + } else + rt.set = rulenum; + rt.new_set = atoi(av[2]); + if (!isdigit(*(av[0])) || (cmd == 3 && rt.set > RESVD_SET) || + (cmd == 2 && rt.start_rule == IPFW_DEFAULT_RULE) ) errx(EX_DATAERR, "invalid source number %s\n", av[0]); if (!isdigit(*(av[2])) || new_set > RESVD_SET) errx(EX_DATAERR, "invalid dest. set %s\n", av[1]); - masks[0] = (cmd << 24) | (new_set << 16) | (rulenum); - i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t)); + i = do_range_cmd(cmd, &rt); } else if (_substrcmp(*av, "disable") == 0 || _substrcmp(*av, "enable") == 0 ) { int which = _substrcmp(*av, "enable") == 0 ? 1 : 0; @@ -1944,9 +2230,11 @@ ipfw_sets_handler(char *av[]) errx(EX_DATAERR, "cannot enable and disable the same set\n"); - i = do_cmd(IP_FW_DEL, masks, sizeof(masks)); + rt.set = masks[0]; + rt.new_set = masks[1]; + i = do_range_cmd(IP_FW_SET_ENABLE, &rt); if (i) - warn("set enable/disable: setsockopt(IP_FW_DEL)"); + warn("set enable/disable: setsockopt(IP_FW_SET_ENABLE)"); } else errx(EX_USAGE, "invalid set command %s\n", *av); } @@ -1984,28 +2272,204 @@ ipfw_sysctl_handler(char *av[], int which) } } +typedef void state_cb(struct cmdline_opts *co, struct format_opts *fo, + void *arg, void *state); + +static void +prepare_format_dyn(struct cmdline_opts *co, struct format_opts *fo, + void *arg, void *_state) +{ + ipfw_dyn_rule *d; + int width; + uint8_t set; + + d = (ipfw_dyn_rule *)_state; + /* Count _ALL_ states */ + fo->dcnt++; + + if (fo->show_counters == 0) + return; + + if (co->use_set) { + /* skip states from another set */ + bcopy((char *)&d->rule + sizeof(uint16_t), &set, + sizeof(uint8_t)); + if (set != co->use_set - 1) + return; + } + + width = pr_u64(NULL, &d->pcnt, 0); + if (width > fo->pcwidth) + fo->pcwidth = width; + + width = pr_u64(NULL, &d->bcnt, 0); + if (width > fo->bcwidth) + fo->bcwidth = width; +} + +static int +foreach_state(struct cmdline_opts *co, struct format_opts *fo, + caddr_t base, size_t sz, state_cb dyn_bc, void *dyn_arg) +{ + int ttype; + state_cb *fptr; + void *farg; + ipfw_obj_tlv *tlv; + ipfw_obj_ctlv *ctlv; + + fptr = NULL; + ttype = 0; + + while (sz > 0) { + ctlv = (ipfw_obj_ctlv *)base; + switch (ctlv->head.type) { + case IPFW_TLV_DYNSTATE_LIST: + base += sizeof(*ctlv); + sz -= sizeof(*ctlv); + ttype = IPFW_TLV_DYN_ENT; + fptr = dyn_bc; + farg = dyn_arg; + break; + default: + return (sz); + } + + while (sz > 0) { + tlv = (ipfw_obj_tlv *)base; + if (tlv->type != ttype) + break; + + fptr(co, fo, farg, tlv + 1); + sz -= tlv->length; + base += tlv->length; + } + } + + return (sz); +} + +static void +prepare_format_opts(struct cmdline_opts *co, struct format_opts *fo, + ipfw_obj_tlv *rtlv, int rcnt, caddr_t dynbase, size_t dynsz) +{ + int bcwidth, pcwidth, width; + int n; + struct ip_fw_bcounter *cntr; + struct ip_fw_rule *r; + + bcwidth = 0; + pcwidth = 0; + if (fo->show_counters != 0) { + for (n = 0; n < rcnt; n++, + rtlv = (ipfw_obj_tlv *)((caddr_t)rtlv + rtlv->length)) { + cntr = (struct ip_fw_bcounter *)(rtlv + 1); + r = (struct ip_fw_rule *)((caddr_t)cntr + cntr->size); + /* skip rules from another set */ + if (co->use_set && r->set != co->use_set - 1) + continue; + + /* packet counter */ + width = pr_u64(NULL, &cntr->pcnt, 0); + if (width > pcwidth) + pcwidth = width; + + /* byte counter */ + width = pr_u64(NULL, &cntr->bcnt, 0); + if (width > bcwidth) + bcwidth = width; + } + } + fo->bcwidth = bcwidth; + fo->pcwidth = pcwidth; + + fo->dcnt = 0; + if (co->do_dynamic && dynsz > 0) + foreach_state(co, fo, dynbase, dynsz, prepare_format_dyn, NULL); +} + +static int +list_static_range(struct cmdline_opts *co, struct format_opts *fo, + struct buf_pr *bp, ipfw_obj_tlv *rtlv, int rcnt) +{ + int n, seen; + struct ip_fw_rule *r; + struct ip_fw_bcounter *cntr; + int c = 0; + + for (n = seen = 0; n < rcnt; n++, + rtlv = (ipfw_obj_tlv *)((caddr_t)rtlv + rtlv->length)) { + + if (fo->show_counters != 0) { + cntr = (struct ip_fw_bcounter *)(rtlv + 1); + r = (struct ip_fw_rule *)((caddr_t)cntr + cntr->size); + } else { + cntr = NULL; + r = (struct ip_fw_rule *)(rtlv + 1); + } + if (r->rulenum > fo->last) + break; + if (co->use_set && r->set != co->use_set - 1) + continue; + if (r->rulenum >= fo->first && r->rulenum <= fo->last) { + show_static_rule(co, fo, bp, r, cntr); + printf("%s", bp->buf); + c += rtlv->length; + bp_flush(bp); + seen++; + } + } + + return (seen); +} + +static void +list_dyn_state(struct cmdline_opts *co, struct format_opts *fo, + void *_arg, void *_state) +{ + uint16_t rulenum; + uint8_t set; + ipfw_dyn_rule *d; + struct buf_pr *bp; + + d = (ipfw_dyn_rule *)_state; + bp = (struct buf_pr *)_arg; + + bcopy(&d->rule, &rulenum, sizeof(rulenum)); + if (rulenum > fo->last) + return; + if (co->use_set) { + bcopy((char *)&d->rule + sizeof(uint16_t), + &set, sizeof(uint8_t)); + if (set != co->use_set - 1) + return; + } + if (rulenum >= fo->first) { + show_dyn_state(co, fo, bp, d); + printf("%s\n", bp->buf); + bp_flush(bp); + } +} + +static int +list_dyn_range(struct cmdline_opts *co, struct format_opts *fo, + struct buf_pr *bp, caddr_t base, size_t sz) +{ + + sz = foreach_state(co, fo, base, sz, list_dyn_state, bp); + return (sz); +} + void ipfw_list(int ac, char *av[], int show_counters) { - struct ip_fw *r; - ipfw_dyn_rule *dynrules, *d; - -#define NEXT(r) ((struct ip_fw *)((char *)r + RULESIZE(r))) - char *lim; - void *data = NULL; - int bcwidth, n, nbytes, nstat, ndyn, pcwidth, width; - int exitval = EX_OK; + ipfw_cfg_lheader *cfg; + struct format_opts sfo; + size_t sz; + int error; int lac; char **lav; - u_long rnum, last; + uint32_t rnum; char *endptr; - int seen = 0; - uint8_t set; - - const int ocmd = co.do_pipe ? IP_DUMMYNET_GET : IP_FW_GET; - int nalloc = 1024; /* start somewhere... */ - - last = 0; if (co.test_only) { fprintf(stderr, "Testing only, list disabled\n"); @@ -2018,162 +2482,215 @@ ipfw_list(int ac, char *av[], int show_counters) ac--; av++; + memset(&sfo, 0, sizeof(sfo)); - /* get rules or pipes from kernel, resizing array as necessary */ - nbytes = nalloc; + /* Determine rule range to request */ + if (ac > 0) { + for (lac = ac, lav = av; lac != 0; lac--) { + rnum = strtoul(*lav++, &endptr, 10); + if (sfo.first == 0 || rnum < sfo.first) + sfo.first = rnum; - while (nbytes >= nalloc) { - nalloc = nalloc * 2 + 200; - nbytes = nalloc; - data = safe_realloc(data, nbytes); - if (do_cmd(ocmd, data, (uintptr_t)&nbytes) < 0) - err(EX_OSERR, "getsockopt(IP_%s_GET)", - co.do_pipe ? "DUMMYNET" : "FW"); + if (*endptr == '-') + rnum = strtoul(endptr + 1, &endptr, 10); + if (sfo.last == 0 || rnum > sfo.last) + sfo.last = rnum; + } } - /* - * Count static rules. They have variable size so we - * need to scan the list to count them. - */ - for (nstat = 1, r = data, lim = (char *)data + nbytes; - r->rulenum < IPFW_DEFAULT_RULE && (char *)r < lim; - ++nstat, r = NEXT(r) ) - ; /* nothing */ + /* get configuraion from kernel */ + cfg = NULL; + sfo.show_counters = show_counters; + sfo.flags = IPFW_CFG_GET_STATIC; + if (co.do_dynamic != 0) + sfo.flags |= IPFW_CFG_GET_STATES; + if (sfo.show_counters != 0) + sfo.flags |= IPFW_CFG_GET_COUNTERS; + if ((error = ipfw_get_config(&co, &sfo, &cfg, &sz)) != 0) + err(EX_OSERR, "retrieving config failed"); + + error = ipfw_show_config(&co, &sfo, cfg, sz, ac, av); + + free(cfg); + + if (error != EX_OK) + exit(error); +} + +static int +ipfw_show_config(struct cmdline_opts *co, struct format_opts *fo, + ipfw_cfg_lheader *cfg, size_t sz, int ac, char *av[]) +{ + caddr_t dynbase; + size_t dynsz; + int rcnt; + int exitval = EX_OK; + int lac; + char **lav; + char *endptr; + size_t read; + struct buf_pr bp; + ipfw_obj_ctlv *ctlv, *tstate; + ipfw_obj_tlv *rbase; /* - * Count dynamic rules. This is easier as they have - * fixed size. + * Handle tablenames TLV first, if any */ - r = NEXT(r); - dynrules = (ipfw_dyn_rule *)r ; - n = (char *)r - (char *)data; - ndyn = (nbytes - n) / sizeof *dynrules; - - /* if showing stats, figure out column widths ahead of time */ - bcwidth = pcwidth = 0; - if (show_counters) { - for (n = 0, r = data; n < nstat; n++, r = NEXT(r)) { - /* skip rules from another set */ - if (co.use_set && r->set != co.use_set - 1) - continue; - - /* packet counter */ - width = pr_u64(&r->pcnt, 0); - if (width > pcwidth) - pcwidth = width; + tstate = NULL; + rbase = NULL; + dynbase = NULL; + dynsz = 0; + read = sizeof(*cfg); + + fo->set_mask = cfg->set_mask; + + ctlv = (ipfw_obj_ctlv *)(cfg + 1); + + if (cfg->flags & IPFW_CFG_GET_STATIC) { + /* We've requested static rules */ + if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) { + fo->tstate = ctlv; + read += ctlv->head.length; + ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + + ctlv->head.length); + } - /* byte counter */ - width = pr_u64(&r->bcnt, 0); - if (width > bcwidth) - bcwidth = width; + if (ctlv->head.type == IPFW_TLV_RULE_LIST) { + rbase = (ipfw_obj_tlv *)(ctlv + 1); + rcnt = ctlv->count; + read += ctlv->head.length; + ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + + ctlv->head.length); } } - if (co.do_dynamic && ndyn) { - for (n = 0, d = dynrules; n < ndyn; n++, d++) { - if (co.use_set) { - /* skip rules from another set */ - bcopy((char *)&d->rule + sizeof(uint16_t), - &set, sizeof(uint8_t)); - if (set != co.use_set - 1) - continue; - } - width = pr_u64(&d->pcnt, 0); - if (width > pcwidth) - pcwidth = width; - width = pr_u64(&d->bcnt, 0); - if (width > bcwidth) - bcwidth = width; - } + if ((cfg->flags & IPFW_CFG_GET_STATES) && (read != sz)) { + /* We may have some dynamic states */ + dynsz = sz - read; + /* Skip empty header */ + if (dynsz != sizeof(ipfw_obj_ctlv)) + dynbase = (caddr_t)ctlv; + else + dynsz = 0; } + + prepare_format_opts(co, fo, rbase, rcnt, dynbase, dynsz); + bp_alloc(&bp, 4096); + /* if no rule numbers were specified, list all rules */ if (ac == 0) { - for (n = 0, r = data; n < nstat; n++, r = NEXT(r)) { - if (co.use_set && r->set != co.use_set - 1) - continue; - show_ipfw(r, pcwidth, bcwidth); - } + fo->first = 0; + fo->last = IPFW_DEFAULT_RULE; + list_static_range(co, fo, &bp, rbase, rcnt); - if (co.do_dynamic && ndyn) { - printf("## Dynamic rules (%d):\n", ndyn); - for (n = 0, d = dynrules; n < ndyn; n++, d++) { - if (co.use_set) { - bcopy((char *)&d->rule + sizeof(uint16_t), - &set, sizeof(uint8_t)); - if (set != co.use_set - 1) - continue; - } - show_dyn_ipfw(d, pcwidth, bcwidth); + if (co->do_dynamic && dynsz > 0) { + printf("## Dynamic rules (%d %lu):\n", fo->dcnt, dynsz); + list_dyn_range(co, fo, &bp, dynbase, dynsz); } - } - goto done; + + bp_free(&bp); + return (EX_OK); } /* display specific rules requested on command line */ - for (lac = ac, lav = av; lac != 0; lac--) { /* convert command line rule # */ - last = rnum = strtoul(*lav++, &endptr, 10); + fo->last = fo->first = strtoul(*lav++, &endptr, 10); if (*endptr == '-') - last = strtoul(endptr+1, &endptr, 10); + fo->last = strtoul(endptr + 1, &endptr, 10); if (*endptr) { exitval = EX_USAGE; warnx("invalid rule number: %s", *(lav - 1)); continue; } - for (n = seen = 0, r = data; n < nstat; n++, r = NEXT(r) ) { - if (r->rulenum > last) - break; - if (co.use_set && r->set != co.use_set - 1) - continue; - if (r->rulenum >= rnum && r->rulenum <= last) { - show_ipfw(r, pcwidth, bcwidth); - seen = 1; - } - } - if (!seen) { + + if (list_static_range(co, fo, &bp, rbase, rcnt) == 0) { /* give precedence to other error(s) */ if (exitval == EX_OK) exitval = EX_UNAVAILABLE; - warnx("rule %lu does not exist", rnum); + if (fo->first == fo->last) + warnx("rule %u does not exist", fo->first); + else + warnx("no rules in range %u-%u", + fo->first, fo->last); } } - if (co.do_dynamic && ndyn) { + if (co->do_dynamic && dynsz > 0) { printf("## Dynamic rules:\n"); for (lac = ac, lav = av; lac != 0; lac--) { - last = rnum = strtoul(*lav++, &endptr, 10); + fo->last = fo->first = strtoul(*lav++, &endptr, 10); if (*endptr == '-') - last = strtoul(endptr+1, &endptr, 10); + fo->last = strtoul(endptr+1, &endptr, 10); if (*endptr) /* already warned */ continue; - for (n = 0, d = dynrules; n < ndyn; n++, d++) { - uint16_t rulenum; - - bcopy(&d->rule, &rulenum, sizeof(rulenum)); - if (rulenum > rnum) - break; - if (co.use_set) { - bcopy((char *)&d->rule + sizeof(uint16_t), - &set, sizeof(uint8_t)); - if (set != co.use_set - 1) - continue; - } - if (r->rulenum >= rnum && r->rulenum <= last) - show_dyn_ipfw(d, pcwidth, bcwidth); - } + list_dyn_range(co, fo, &bp, dynbase, dynsz); } } - ac = 0; + bp_free(&bp); + return (exitval); +} -done: - free(data); - if (exitval != EX_OK) - exit(exitval); -#undef NEXT +/* + * Retrieves current ipfw configuration of given type + * and stores its pointer to @pcfg. + * + * Caller is responsible for freeing @pcfg. + * + * Returns 0 on success. + */ + +static int +ipfw_get_config(struct cmdline_opts *co, struct format_opts *fo, + ipfw_cfg_lheader **pcfg, size_t *psize) +{ + ipfw_cfg_lheader *cfg; + size_t sz; + int error, i; + + + if (co->test_only != 0) { + fprintf(stderr, "Testing only, list disabled\n"); + return (0); + } + + /* Start with some data size */ + sz = 4096; + cfg = NULL; + + for (i = 0; i < 16; i++) { + if (cfg != NULL) + free(cfg); + if ((cfg = calloc(1, sz)) == NULL) + return (ENOMEM); + + cfg->flags = fo->flags; + cfg->start_rule = fo->first; + cfg->end_rule = fo->last; + + if ((error = do_get3(IP_FW_XGET, &cfg->opheader, &sz)) != 0) { + if (error != ENOMEM) { + free(cfg); + return (error); + } + + /* Buffer size is not enough. Try to increase */ + sz = sz * 2; + if (sz < cfg->size) + sz = cfg->size; + continue; + } + + *pcfg = cfg; + *psize = sz; + return (0); + } + + free(cfg); + return (ENOMEM); } static int @@ -2189,6 +2706,79 @@ lookup_host (char *host, struct in_addr *ipaddr) return(0); } +struct tidx { + ipfw_obj_ntlv *idx; + uint32_t count; + uint32_t size; + uint16_t counter; + uint8_t set; +}; + +static uint16_t +pack_table(struct tidx *tstate, char *name) +{ + int i; + ipfw_obj_ntlv *ntlv; + + if (table_check_name(name) != 0) + return (0); + + for (i = 0; i < tstate->count; i++) { + if (strcmp(tstate->idx[i].name, name) != 0) + continue; + if (tstate->idx[i].set != tstate->set) + continue; + + return (tstate->idx[i].idx); + } + + if (tstate->count + 1 > tstate->size) { + tstate->size += 4; + tstate->idx = realloc(tstate->idx, tstate->size * + sizeof(ipfw_obj_ntlv)); + if (tstate->idx == NULL) + return (0); + } + + ntlv = &tstate->idx[i]; + memset(ntlv, 0, sizeof(ipfw_obj_ntlv)); + strlcpy(ntlv->name, name, sizeof(ntlv->name)); + ntlv->head.type = IPFW_TLV_TBL_NAME; + ntlv->head.length = sizeof(ipfw_obj_ntlv); + ntlv->set = tstate->set; + ntlv->idx = ++tstate->counter; + tstate->count++; + + return (ntlv->idx); +} + +static void +fill_table(ipfw_insn *cmd, char *av, uint8_t opcode, struct tidx *tstate) +{ + uint32_t *d = ((ipfw_insn_u32 *)cmd)->d; + uint16_t uidx; + char *p; + + if ((p = strchr(av + 6, ')')) == NULL) + errx(EX_DATAERR, "forgotten parenthesis: '%s'", av); + *p = '\0'; + p = strchr(av + 6, ','); + if (p) + *p++ = '\0'; + + if ((uidx = pack_table(tstate, av + 6)) == 0) + errx(EX_DATAERR, "Invalid table name: %s", av + 6); + + cmd->opcode = opcode; + cmd->arg1 = uidx; + if (p) { + cmd->len |= F_INSN_SIZE(ipfw_insn_u32); + d[0] = strtoul(p, NULL, 0); + } else + cmd->len |= F_INSN_SIZE(ipfw_insn); +} + + /* * fills the addr and mask fields in the instruction as appropriate from av. * Update length as appropriate. @@ -2201,11 +2791,10 @@ lookup_host (char *host, struct in_addr *ipaddr) * We can have multiple comma-separated address/mask entries. */ static void -fill_ip(ipfw_insn_ip *cmd, char *av, int cblen) +fill_ip(ipfw_insn_ip *cmd, char *av, int cblen, struct tidx *tstate) { int len = 0; uint32_t *d = ((ipfw_insn_u32 *)cmd)->d; - uint32_t tables_max; cmd->o.len &= ~F_LEN_MASK; /* zero len */ @@ -2218,21 +2807,7 @@ fill_ip(ipfw_insn_ip *cmd, char *av, int cblen) } if (strncmp(av, "table(", 6) == 0) { - char *p = strchr(av + 6, ','); - - if (p) - *p++ = '\0'; - cmd->o.opcode = O_IP_DST_LOOKUP; - cmd->o.arg1 = strtoul(av + 6, NULL, 0); - tables_max = ipfw_get_tables_max(); - if (cmd->o.arg1 > tables_max) - errx(EX_USAGE, "The table number exceeds the maximum " - "allowed value (%u)", tables_max - 1); - if (p) { - cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); - d[0] = strtoul(p, NULL, 0); - } else - cmd->o.len |= F_INSN_SIZE(ipfw_insn); + fill_table(&cmd->o, av, O_IP_DST_LOOKUP, tstate); return; } @@ -2413,35 +2988,14 @@ n2mask(struct in6_addr *mask, int n) return; } -/* - * helper function to process a set of flags and set bits in the - * appropriate masks. - */ static void -fill_flags(ipfw_insn *cmd, enum ipfw_opcodes opcode, +fill_flags_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, struct _s_x *flags, char *p) { - uint8_t set=0, clear=0; + uint8_t set = 0, clear = 0; - while (p && *p) { - char *q; /* points to the separator */ - int val; - uint8_t *which; /* mask we are working on */ + fill_flags(flags, p, &set, &clear); - if (*p == '!') { - p++; - which = &clear; - } else - which = &set; - q = strchr(p, ','); - if (q) - *q++ = '\0'; - val = match_token(flags, p); - if (val <= 0) - errx(EX_DATAERR, "invalid flag %s", p); - *which |= (uint8_t)val; - p = q; - } cmd->opcode = opcode; cmd->len = (cmd->len & (F_NOT | F_OR)) | 1; cmd->arg1 = (set & 0xff) | ( (clear & 0xff) << 8); @@ -2455,9 +3009,11 @@ ipfw_delete(char *av[]) int i; int exitval = EX_OK; int do_set = 0; + ipfw_range_tlv rt; av++; NEED1("missing rule specification"); + memset(&rt, 0, sizeof(rt)); if ( *av && _substrcmp(*av, "set") == 0) { /* Do not allow using the following syntax: * ipfw set N delete set M @@ -2480,15 +3036,25 @@ ipfw_delete(char *av[]) } else if (co.do_pipe) { exitval = ipfw_delete_pipe(co.do_pipe, i); } else { - if (co.use_set) - rulenum = (i & 0xffff) | (5 << 24) | - ((co.use_set - 1) << 16); - else - rulenum = (i & 0xffff) | (do_set << 24); - i = do_cmd(IP_FW_DEL, &rulenum, sizeof rulenum); - if (i) { + if (do_set != 0) { + rt.set = i & 31; + rt.flags = IPFW_RCFLAG_SET; + } else { + rt.start_rule = i & 0xffff; + rt.end_rule = i & 0xffff; + if (rt.start_rule == 0 && rt.end_rule == 0) + rt.flags |= IPFW_RCFLAG_ALL; + else + rt.flags |= IPFW_RCFLAG_RANGE; + if (co.use_set != 0) { + rt.set = co.use_set - 1; + rt.flags |= IPFW_RCFLAG_SET; + } + } + i = do_range_cmd(IP_FW_XDEL, &rt); + if (i != 0) { exitval = EX_UNAVAILABLE; - warn("rule %u: setsockopt(IP_FW_DEL)", + warn("rule %u: setsockopt(IP_FW_XDEL)", rulenum); } } @@ -2506,8 +3072,11 @@ ipfw_delete(char *av[]) * patterns which match interfaces. */ static void -fill_iface(ipfw_insn_if *cmd, char *arg, int cblen) +fill_iface(ipfw_insn_if *cmd, char *arg, int cblen, struct tidx *tstate) { + char *p; + uint16_t uidx; + cmd->name[0] = '\0'; cmd->o.len |= F_INSN_SIZE(ipfw_insn_if); @@ -2517,11 +3086,17 @@ fill_iface(ipfw_insn_if *cmd, char *arg, int cblen) if (strcmp(arg, "any") == 0) cmd->o.len = 0; /* effectively ignore this command */ else if (strncmp(arg, "table(", 6) == 0) { - char *p = strchr(arg + 6, ','); + if ((p = strchr(arg + 6, ')')) == NULL) + errx(EX_DATAERR, "forgotten parenthesis: '%s'", arg); + *p = '\0'; + p = strchr(arg + 6, ','); if (p) *p++ = '\0'; + if ((uidx = pack_table(tstate, arg + 6)) == 0) + errx(EX_DATAERR, "Invalid table name: %s", arg + 6); + cmd->name[0] = '\1'; /* Special value indicating table */ - cmd->p.glob = strtoul(arg + 6, NULL, 0); + cmd->p.kidx = uidx; } else if (!isdigit(*arg)) { strlcpy(cmd->name, arg, sizeof(cmd->name)); cmd->p.glob = strpbrk(arg, "*?[") != NULL ? 1 : 0; @@ -2735,9 +3310,9 @@ add_proto_compat(ipfw_insn *cmd, char *av, u_char *protop) } static ipfw_insn * -add_srcip(ipfw_insn *cmd, char *av, int cblen) +add_srcip(ipfw_insn *cmd, char *av, int cblen, struct tidx *tstate) { - fill_ip((ipfw_insn_ip *)cmd, av, cblen); + fill_ip((ipfw_insn_ip *)cmd, av, cblen, tstate); if (cmd->opcode == O_IP_DST_SET) /* set */ cmd->opcode = O_IP_SRC_SET; else if (cmd->opcode == O_IP_DST_LOOKUP) /* table */ @@ -2752,9 +3327,9 @@ add_srcip(ipfw_insn *cmd, char *av, int cblen) } static ipfw_insn * -add_dstip(ipfw_insn *cmd, char *av, int cblen) +add_dstip(ipfw_insn *cmd, char *av, int cblen, struct tidx *tstate) { - fill_ip((ipfw_insn_ip *)cmd, av, cblen); + fill_ip((ipfw_insn_ip *)cmd, av, cblen, tstate); if (cmd->opcode == O_IP_DST_SET) /* set */ ; else if (cmd->opcode == O_IP_DST_LOOKUP) /* table */ @@ -2783,7 +3358,7 @@ add_ports(ipfw_insn *cmd, char *av, u_char proto, int opcode, int cblen) } static ipfw_insn * -add_src(ipfw_insn *cmd, char *av, u_char proto, int cblen) +add_src(ipfw_insn *cmd, char *av, u_char proto, int cblen, struct tidx *tstate) { struct in6_addr a; char *host, *ch, buf[INET6_ADDRSTRLEN]; @@ -2806,7 +3381,7 @@ add_src(ipfw_insn *cmd, char *av, u_char proto, int cblen) /* XXX: should check for IPv4, not !IPv6 */ if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 || inet_pton(AF_INET6, host, &a) != 1)) - ret = add_srcip(cmd, av, cblen); + ret = add_srcip(cmd, av, cblen, tstate); if (ret == NULL && strcmp(av, "any") != 0) ret = cmd; @@ -2814,7 +3389,7 @@ add_src(ipfw_insn *cmd, char *av, u_char proto, int cblen) } static ipfw_insn * -add_dst(ipfw_insn *cmd, char *av, u_char proto, int cblen) +add_dst(ipfw_insn *cmd, char *av, u_char proto, int cblen, struct tidx *tstate) { struct in6_addr a; char *host, *ch, buf[INET6_ADDRSTRLEN]; @@ -2837,7 +3412,7 @@ add_dst(ipfw_insn *cmd, char *av, u_char proto, int cblen) /* XXX: should check for IPv4, not !IPv6 */ if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 || inet_pton(AF_INET6, host, &a) != 1)) - ret = add_dstip(cmd, av, cblen); + ret = add_dstip(cmd, av, cblen, tstate); if (ret == NULL && strcmp(av, "any") != 0) ret = cmd; @@ -2857,7 +3432,7 @@ add_dst(ipfw_insn *cmd, char *av, u_char proto, int cblen) * */ void -ipfw_add(char *av[]) +compile_rule(char *av[], uint32_t *rbuf, int *rbufsize, struct tidx *tstate) { /* * rules are added into the 'rulebuf' and then copied in @@ -2865,13 +3440,13 @@ ipfw_add(char *av[]) * Some things that need to go out of order (prob, action etc.) * go into actbuf[]. */ - static uint32_t rulebuf[255], actbuf[255], cmdbuf[255]; + static uint32_t actbuf[255], cmdbuf[255]; int rblen, ablen, cblen; ipfw_insn *src, *dst, *cmd, *action, *prev=NULL; ipfw_insn *first_cmd; /* first match pattern */ - struct ip_fw *rule; + struct ip_fw_rule *rule; /* * various flags used to record that we entered some fields. @@ -2891,14 +3466,14 @@ ipfw_add(char *av[]) bzero(actbuf, sizeof(actbuf)); /* actions go here */ bzero(cmdbuf, sizeof(cmdbuf)); - bzero(rulebuf, sizeof(rulebuf)); + bzero(rbuf, *rbufsize); - rule = (struct ip_fw *)rulebuf; + rule = (struct ip_fw_rule *)rbuf; cmd = (ipfw_insn *)cmdbuf; action = (ipfw_insn *)actbuf; - rblen = sizeof(rulebuf) / sizeof(rulebuf[0]); - rblen -= offsetof(struct ip_fw, cmd) / sizeof(rulebuf[0]); + rblen = *rbufsize / sizeof(uint32_t); + rblen -= sizeof(struct ip_fw_rule) / sizeof(uint32_t); ablen = sizeof(actbuf) / sizeof(actbuf[0]); cblen = sizeof(cmdbuf) / sizeof(cmdbuf[0]); cblen -= F_INSN_SIZE(ipfw_insn_u32) + 1; @@ -2920,6 +3495,7 @@ ipfw_add(char *av[]) if (set < 0 || set > RESVD_SET) errx(EX_DATAERR, "illegal set %s", av[1]); rule->set = set; + tstate->set = set; av += 2; } @@ -3025,11 +3601,11 @@ chkarg: errx(EX_USAGE, "missing argument for %s", *(av - 1)); if (isdigit(**av)) { action->arg1 = strtoul(*av, NULL, 10); - if (action->arg1 <= 0 || action->arg1 >= IP_FW_TABLEARG) + if (action->arg1 <= 0 || action->arg1 >= IP_FW_TARG) errx(EX_DATAERR, "illegal argument for %s", *(av - 1)); } else if (_substrcmp(*av, "tablearg") == 0) { - action->arg1 = IP_FW_TABLEARG; + action->arg1 = IP_FW_TARG; } else if (i == TOK_DIVERT || i == TOK_TEE) { struct servent *s; setservent(1); @@ -3153,7 +3729,7 @@ chkarg: action->opcode = O_SETFIB; NEED1("missing fib number"); if (_substrcmp(*av, "tablearg") == 0) { - action->arg1 = IP_FW_TABLEARG; + action->arg1 = IP_FW_TARG; } else { action->arg1 = strtoul(*av, NULL, 10); if (sysctlbyname("net.fibs", &numfibs, &intsize, @@ -3161,6 +3737,8 @@ chkarg: errx(EX_DATAERR, "fibs not suported.\n"); if (action->arg1 >= numfibs) /* Temporary */ errx(EX_DATAERR, "fib too large.\n"); + /* Add high-order bit to fib to make room for tablearg*/ + action->arg1 |= 0x8000; } av++; break; @@ -3173,13 +3751,16 @@ chkarg: action->opcode = O_SETDSCP; NEED1("missing DSCP code"); if (_substrcmp(*av, "tablearg") == 0) { - action->arg1 = IP_FW_TABLEARG; + action->arg1 = IP_FW_TARG; } else if (isalpha(*av[0])) { if ((code = match_token(f_ipdscp, *av)) == -1) errx(EX_DATAERR, "Unknown DSCP code"); action->arg1 = code; } else action->arg1 = strtoul(*av, NULL, 10); + /* Add high-order bit to DSCP to make room for tablearg */ + if (action->arg1 != IP_FW_TARG) + action->arg1 |= 0x8000; av++; break; } @@ -3386,7 +3967,7 @@ chkarg: OR_START(source_ip); NOT_BLOCK; /* optional "not" */ NEED1("missing source address"); - if (add_src(cmd, *av, proto, cblen)) { + if (add_src(cmd, *av, proto, cblen, tstate)) { av++; if (F_LEN(cmd) != 0) { /* ! any */ prev = cmd; @@ -3422,7 +4003,7 @@ chkarg: OR_START(dest_ip); NOT_BLOCK; /* optional "not" */ NEED1("missing dst address"); - if (add_dst(cmd, *av, proto, cblen)) { + if (add_dst(cmd, *av, proto, cblen, tstate)) { av++; if (F_LEN(cmd) != 0) { /* ! any */ prev = cmd; @@ -3451,7 +4032,7 @@ read_options: * nothing specified so far, store in the rule to ease * printout later. */ - rule->_pad = 1; + rule->flags |= IPFW_RULE_NOOPT; } prev = NULL; while ( av[0] != NULL ) { @@ -3529,7 +4110,7 @@ read_options: case TOK_VIA: NEED1("recv, xmit, via require interface name" " or address"); - fill_iface((ipfw_insn_if *)cmd, av[0], cblen); + fill_iface((ipfw_insn_if *)cmd, av[0], cblen, tstate); av++; if (F_LEN(cmd) == 0) /* not a valid address */ break; @@ -3604,13 +4185,13 @@ read_options: case TOK_IPOPTS: NEED1("missing argument for ipoptions"); - fill_flags(cmd, O_IPOPT, f_ipopts, *av); + fill_flags_cmd(cmd, O_IPOPT, f_ipopts, *av); av++; break; case TOK_IPTOS: NEED1("missing argument for iptos"); - fill_flags(cmd, O_IPTOS, f_iptos, *av); + fill_flags_cmd(cmd, O_IPTOS, f_iptos, *av); av++; break; @@ -3688,7 +4269,7 @@ read_options: case TOK_TCPOPTS: NEED1("missing argument for tcpoptions"); - fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av); + fill_flags_cmd(cmd, O_TCPOPTS, f_tcpopts, *av); av++; break; @@ -3715,7 +4296,7 @@ read_options: case TOK_TCPFLAGS: NEED1("missing argument for tcpflags"); cmd->opcode = O_TCPFLAGS; - fill_flags(cmd, O_TCPFLAGS, f_tcpflags, *av); + fill_flags_cmd(cmd, O_TCPFLAGS, f_tcpflags, *av); av++; break; @@ -3775,14 +4356,14 @@ read_options: case TOK_SRCIP: NEED1("missing source IP"); - if (add_srcip(cmd, *av, cblen)) { + if (add_srcip(cmd, *av, cblen, tstate)) { av++; } break; case TOK_DSTIP: NEED1("missing destination IP"); - if (add_dstip(cmd, *av, cblen)) { + if (add_dstip(cmd, *av, cblen, tstate)) { av++; } break; @@ -3901,7 +4482,6 @@ read_options: case TOK_LOOKUP: { ipfw_insn_u32 *c = (ipfw_insn_u32 *)cmd; - char *p; int j; if (!av[0] || !av[1]) @@ -3917,12 +4497,22 @@ read_options: errx(EX_USAGE, "format: cannot lookup on %s", *av); __PAST_END(c->d, 1) = j; // i converted to option av++; - cmd->arg1 = strtoul(*av, &p, 0); - if (p && *p) - errx(EX_USAGE, "format: lookup argument tablenum"); + + if ((j = pack_table(tstate, *av)) == 0) + errx(EX_DATAERR, "Invalid table name: %s", *av); + + cmd->arg1 = j; av++; } break; + case TOK_FLOW: + NEED1("missing table name"); + if (strncmp(*av, "table(", 6) != 0) + errx(EX_DATAERR, + "enclose table name into \"table()\""); + fill_table(cmd, *av, O_IP_FLOW_LOOKUP, tstate); + av++; + break; default: errx(EX_USAGE, "unrecognised option [%d] %s\n", i, s); @@ -4024,34 +4614,142 @@ done: } rule->cmd_len = (uint32_t *)dst - (uint32_t *)(rule->cmd); - i = (char *)dst - (char *)rule; - if (do_cmd(IP_FW_ADD, rule, (uintptr_t)&i) == -1) - err(EX_UNAVAILABLE, "getsockopt(%s)", "IP_FW_ADD"); - if (!co.do_quiet) - show_ipfw(rule, 0, 0); + *rbufsize = (char *)dst - (char *)rule; +} + +/* + * Adds one or more rules to ipfw chain. + * Data layout: + * Request: + * [ + * ip_fw3_opheader + * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1) + * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) [ ip_fw_rule ip_fw_insn ] x N ] (*2) (*3) + * ] + * Reply: + * [ + * ip_fw3_opheader + * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) + * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) [ ip_fw_rule ip_fw_insn ] x N ] + * ] + * + * Rules in reply are modified to store their actual ruleset number. + * + * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending + * accoring to their idx field and there has to be no duplicates. + * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending. + * (*3) Each ip_fw structure needs to be aligned to u64 boundary. + */ +void +ipfw_add(char *av[]) +{ + uint32_t rulebuf[1024]; + int rbufsize, default_off, tlen, rlen; + size_t sz; + struct tidx ts; + struct ip_fw_rule *rule; + caddr_t tbuf; + ip_fw3_opheader *op3; + ipfw_obj_ctlv *ctlv, *tstate; + + rbufsize = sizeof(rulebuf); + memset(&ts, 0, sizeof(ts)); + + /* Optimize case with no tables */ + default_off = sizeof(ipfw_obj_ctlv) + sizeof(ip_fw3_opheader); + op3 = (ip_fw3_opheader *)rulebuf; + ctlv = (ipfw_obj_ctlv *)(op3 + 1); + rule = (struct ip_fw_rule *)(ctlv + 1); + rbufsize -= default_off; + + compile_rule(av, (uint32_t *)rule, &rbufsize, &ts); + /* Align rule size to u64 boundary */ + rlen = roundup2(rbufsize, sizeof(uint64_t)); + + tbuf = NULL; + sz = 0; + tstate = NULL; + if (ts.count != 0) { + /* Some tables. We have to alloc more data */ + tlen = ts.count * sizeof(ipfw_obj_ntlv); + sz = default_off + sizeof(ipfw_obj_ctlv) + tlen + rlen; + + if ((tbuf = calloc(1, sz)) == NULL) + err(EX_UNAVAILABLE, "malloc() failed for IP_FW_ADD"); + op3 = (ip_fw3_opheader *)tbuf; + /* Tables first */ + ctlv = (ipfw_obj_ctlv *)(op3 + 1); + ctlv->head.type = IPFW_TLV_TBLNAME_LIST; + ctlv->head.length = sizeof(ipfw_obj_ctlv) + tlen; + ctlv->count = ts.count; + ctlv->objsize = sizeof(ipfw_obj_ntlv); + memcpy(ctlv + 1, ts.idx, tlen); + table_sort_ctlv(ctlv); + tstate = ctlv; + /* Rule next */ + ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); + ctlv->head.type = IPFW_TLV_RULE_LIST; + ctlv->head.length = sizeof(ipfw_obj_ctlv) + rlen; + ctlv->count = 1; + memcpy(ctlv + 1, rule, rbufsize); + } else { + /* Simply add header */ + sz = rlen + default_off; + memset(ctlv, 0, sizeof(*ctlv)); + ctlv->head.type = IPFW_TLV_RULE_LIST; + ctlv->head.length = sizeof(ipfw_obj_ctlv) + rlen; + ctlv->count = 1; + } + + if (do_get3(IP_FW_XADD, op3, &sz) != 0) + err(EX_UNAVAILABLE, "getsockopt(%s)", "IP_FW_XADD"); + + if (!co.do_quiet) { + struct format_opts sfo; + struct buf_pr bp; + memset(&sfo, 0, sizeof(sfo)); + sfo.tstate = tstate; + sfo.set_mask = (uint32_t)(-1); + bp_alloc(&bp, 4096); + show_static_rule(&co, &sfo, &bp, rule, NULL); + printf("%s", bp.buf); + bp_free(&bp); + } + + if (tbuf != NULL) + free(tbuf); + + if (ts.idx != NULL) + free(ts.idx); } /* * clear the counters or the log counters. + * optname has the following values: + * 0 (zero both counters and logging) + * 1 (zero logging only) */ void -ipfw_zero(int ac, char *av[], int optname /* 0 = IP_FW_ZERO, 1 = IP_FW_RESETLOG */) +ipfw_zero(int ac, char *av[], int optname) { - uint32_t arg, saved_arg; + ipfw_range_tlv rt; + uint32_t arg; int failed = EX_OK; char const *errstr; char const *name = optname ? "RESETLOG" : "ZERO"; - optname = optname ? IP_FW_RESETLOG : IP_FW_ZERO; + optname = optname ? IP_FW_XRESETLOG : IP_FW_XZERO; + memset(&rt, 0, sizeof(rt)); av++; ac--; - if (!ac) { + if (ac == 0) { /* clear all entries */ - if (do_cmd(optname, NULL, 0) < 0) - err(EX_UNAVAILABLE, "setsockopt(IP_FW_%s)", name); + rt.flags = IPFW_RCFLAG_ALL; + if (do_range_cmd(optname, &rt) < 0) + err(EX_UNAVAILABLE, "setsockopt(IP_FW_X%s)", name); if (!co.do_quiet) - printf("%s.\n", optname == IP_FW_ZERO ? + printf("%s.\n", optname == IP_FW_XZERO ? "Accounting cleared":"Logging counts reset"); return; @@ -4064,18 +4762,20 @@ ipfw_zero(int ac, char *av[], int optname /* 0 = IP_FW_ZERO, 1 = IP_FW_RESETLOG if (errstr) errx(EX_DATAERR, "invalid rule number %s\n", *av); - saved_arg = arg; - if (co.use_set) - arg |= (1 << 24) | ((co.use_set - 1) << 16); - av++; - ac--; - if (do_cmd(optname, &arg, sizeof(arg))) { - warn("rule %u: setsockopt(IP_FW_%s)", - saved_arg, name); + rt.start_rule = arg; + rt.end_rule = arg; + rt.flags |= IPFW_RCFLAG_RANGE; + if (co.use_set != 0) { + rt.set = co.use_set - 1; + rt.flags |= IPFW_RCFLAG_SET; + } + if (do_range_cmd(optname, &rt) != 0) { + warn("rule %u: setsockopt(IP_FW_X%s)", + arg, name); failed = EX_UNAVAILABLE; } else if (!co.do_quiet) - printf("Entry %d %s.\n", saved_arg, - optname == IP_FW_ZERO ? + printf("Entry %d %s.\n", arg, + optname == IP_FW_XZERO ? "cleared" : "logging count reset"); } else { errx(EX_USAGE, "invalid rule number ``%s''", *av); @@ -4088,7 +4788,7 @@ ipfw_zero(int ac, char *av[], int optname /* 0 = IP_FW_ZERO, 1 = IP_FW_RESETLOG void ipfw_flush(int force) { - int cmd = co.do_pipe ? IP_DUMMYNET_FLUSH : IP_FW_FLUSH; + ipfw_range_tlv rt; if (!force && !co.do_quiet) { /* need to ask user */ int c; @@ -4110,316 +4810,117 @@ ipfw_flush(int force) return; } /* `ipfw set N flush` - is the same that `ipfw delete set N` */ - if (co.use_set) { - uint32_t arg = ((co.use_set - 1) & 0xffff) | (1 << 24); - if (do_cmd(IP_FW_DEL, &arg, sizeof(arg)) < 0) - err(EX_UNAVAILABLE, "setsockopt(IP_FW_DEL)"); - } else if (do_cmd(cmd, NULL, 0) < 0) - err(EX_UNAVAILABLE, "setsockopt(IP_%s_FLUSH)", - co.do_pipe ? "DUMMYNET" : "FW"); + memset(&rt, 0, sizeof(rt)); + if (co.use_set != 0) { + rt.set = co.use_set - 1; + rt.flags = IPFW_RCFLAG_SET; + } else + rt.flags = IPFW_RCFLAG_ALL; + if (do_range_cmd(IP_FW_XDEL, &rt) != 0) + err(EX_UNAVAILABLE, "setsockopt(IP_FW_XDEL)"); if (!co.do_quiet) printf("Flushed all %s.\n", co.do_pipe ? "pipes" : "rules"); } +static struct _s_x intcmds[] = { + { "talist", TOK_TALIST }, + { "iflist", TOK_IFLIST }, + { NULL, 0 } +}; -static void table_list(uint16_t num, int need_header); -static void table_fill_xentry(char *arg, ipfw_table_xentry *xent); - -/* - * Retrieve maximum number of tables supported by ipfw(4) module. - */ -uint32_t -ipfw_get_tables_max() +void +ipfw_internal_handler(int ac, char *av[]) { - size_t len; - uint32_t tables_max; - - if (ipfw_tables_max != 0) - return (ipfw_tables_max); + int tcmd; - len = sizeof(tables_max); - if (sysctlbyname("net.inet.ip.fw.tables_max", &tables_max, &len, - NULL, 0) == -1) { - if (co.test_only) - tables_max = 128; /* Old conservative default */ - else - errx(1, "Can't determine maximum number of ipfw tables." - " Perhaps you forgot to load ipfw module?"); - } + ac--; av++; + NEED1("internal cmd required"); - ipfw_tables_max = tables_max; + if ((tcmd = match_token(intcmds, *av)) == -1) + errx(EX_USAGE, "invalid internal sub-cmd: %s", *av); - return (ipfw_tables_max); + switch (tcmd) { + case TOK_IFLIST: + ipfw_list_tifaces(); + break; + case TOK_TALIST: + ipfw_list_ta(ac, av); + break; + } } -/* - * This one handles all table-related commands - * ipfw table N add addr[/masklen] [value] - * ipfw table N delete addr[/masklen] - * ipfw table {N | all} flush - * ipfw table {N | all} list - */ -void -ipfw_table_handler(int ac, char *av[]) +static int +ipfw_get_tracked_ifaces(ipfw_obj_lheader **polh) { - ipfw_table_xentry xent; - int do_add; - int is_all; - uint32_t a; - uint32_t tables_max; + ipfw_obj_lheader req, *olh; + size_t sz; + int error; - tables_max = ipfw_get_tables_max(); + memset(&req, 0, sizeof(req)); + sz = sizeof(req); - memset(&xent, 0, sizeof(xent)); - - ac--; av++; - if (ac && isdigit(**av)) { - xent.tbl = atoi(*av); - is_all = 0; - ac--; av++; - } else if (ac && _substrcmp(*av, "all") == 0) { - xent.tbl = 0; - is_all = 1; - ac--; av++; - } else - errx(EX_USAGE, "table number or 'all' keyword required"); - if (xent.tbl >= tables_max) - errx(EX_USAGE, "The table number exceeds the maximum allowed " - "value (%d)", tables_max - 1); - NEED1("table needs command"); - if (is_all && _substrcmp(*av, "list") != 0 - && _substrcmp(*av, "flush") != 0) - errx(EX_USAGE, "table number required"); - - if (_substrcmp(*av, "add") == 0 || - _substrcmp(*av, "delete") == 0) { - do_add = **av == 'a'; - ac--; av++; - if (!ac) - errx(EX_USAGE, "address required"); - - table_fill_xentry(*av, &xent); - - ac--; av++; - if (do_add && ac) { - unsigned int tval; - /* isdigit is a bit of a hack here.. */ - if (strchr(*av, (int)'.') == NULL && isdigit(**av)) { - xent.value = strtoul(*av, NULL, 0); - } else { - if (lookup_host(*av, (struct in_addr *)&tval) == 0) { - /* The value must be stored in host order * - * so that the values < 65k can be distinguished */ - xent.value = ntohl(tval); - } else { - errx(EX_NOHOST, "hostname ``%s'' unknown", *av); - } - } - } else - xent.value = 0; - if (do_setcmd3(do_add ? IP_FW_TABLE_XADD : IP_FW_TABLE_XDEL, - &xent, xent.len) < 0) { - /* If running silent, don't bomb out on these errors. */ - if (!(co.do_quiet && (errno == (do_add ? EEXIST : ESRCH)))) - err(EX_OSERR, "setsockopt(IP_FW_TABLE_%s)", - do_add ? "XADD" : "XDEL"); - /* In silent mode, react to a failed add by deleting */ - if (do_add) { - do_setcmd3(IP_FW_TABLE_XDEL, &xent, xent.len); - if (do_setcmd3(IP_FW_TABLE_XADD, &xent, xent.len) < 0) - err(EX_OSERR, - "setsockopt(IP_FW_TABLE_XADD)"); - } - } - } else if (_substrcmp(*av, "flush") == 0) { - a = is_all ? tables_max : (uint32_t)(xent.tbl + 1); - do { - if (do_cmd(IP_FW_TABLE_FLUSH, &xent.tbl, - sizeof(xent.tbl)) < 0) - err(EX_OSERR, "setsockopt(IP_FW_TABLE_FLUSH)"); - } while (++xent.tbl < a); - } else if (_substrcmp(*av, "list") == 0) { - a = is_all ? tables_max : (uint32_t)(xent.tbl + 1); - do { - table_list(xent.tbl, is_all); - } while (++xent.tbl < a); - } else - errx(EX_USAGE, "invalid table command %s", *av); -} - -static void -table_fill_xentry(char *arg, ipfw_table_xentry *xent) -{ - int addrlen, mask, masklen, type; - struct in6_addr *paddr; - uint32_t *pkey; - char *p; - uint32_t key; - - mask = 0; - type = 0; - addrlen = 0; - masklen = 0; - - /* - * Let's try to guess type by agrument. - * Possible types: - * 1) IPv4[/mask] - * 2) IPv6[/mask] - * 3) interface name - * 4) port, uid/gid or other u32 key (base 10 format) - * 5) hostname - */ - paddr = &xent->k.addr6; - if (ishexnumber(*arg) != 0 || *arg == ':') { - /* Remove / if exists */ - if ((p = strchr(arg, '/')) != NULL) { - *p = '\0'; - mask = atoi(p + 1); - } + error = do_get3(IP_FW_XIFLIST, &req.opheader, &sz); + if (error != 0 && error != ENOMEM) + return (error); - if (inet_pton(AF_INET, arg, paddr) == 1) { - if (p != NULL && mask > 32) - errx(EX_DATAERR, "bad IPv4 mask width: %s", - p + 1); + sz = req.size; + if ((olh = calloc(1, sz)) == NULL) + return (ENOMEM); - type = IPFW_TABLE_CIDR; - masklen = p ? mask : 32; - addrlen = sizeof(struct in_addr); - } else if (inet_pton(AF_INET6, arg, paddr) == 1) { - if (IN6_IS_ADDR_V4COMPAT(paddr)) - errx(EX_DATAERR, - "Use IPv4 instead of v4-compatible"); - if (p != NULL && mask > 128) - errx(EX_DATAERR, "bad IPv6 mask width: %s", - p + 1); - - type = IPFW_TABLE_CIDR; - masklen = p ? mask : 128; - addrlen = sizeof(struct in6_addr); - } else { - /* Port or any other key */ - /* Skip non-base 10 entries like 'fa1' */ - key = strtol(arg, &p, 10); - if (*p == '\0') { - pkey = (uint32_t *)paddr; - *pkey = htonl(key); - type = IPFW_TABLE_CIDR; - masklen = 32; - addrlen = sizeof(uint32_t); - } else if ((p != arg) && (*p == '.')) { - /* - * Warn on IPv4 address strings - * which are "valid" for inet_aton() but not - * in inet_pton(). - * - * Typical examples: '10.5' or '10.0.0.05' - */ - errx(EX_DATAERR, - "Invalid IPv4 address: %s", arg); - } - } + olh->size = sz; + if ((error = do_get3(IP_FW_XIFLIST, &olh->opheader, &sz)) != 0) { + free(olh); + return (error); } - if (type == 0 && strchr(arg, '.') == NULL) { - /* Assume interface name. Copy significant data only */ - mask = MIN(strlen(arg), IF_NAMESIZE - 1); - memcpy(xent->k.iface, arg, mask); - /* Set mask to exact match */ - masklen = 8 * IF_NAMESIZE; - type = IPFW_TABLE_INTERFACE; - addrlen = IF_NAMESIZE; - } + *polh = olh; + return (0); +} - if (type == 0) { - if (lookup_host(arg, (struct in_addr *)paddr) != 0) - errx(EX_NOHOST, "hostname ``%s'' unknown", arg); +static int +ifinfo_cmp(const void *a, const void *b) +{ + ipfw_iface_info *ia, *ib; - masklen = 32; - type = IPFW_TABLE_CIDR; - addrlen = sizeof(struct in_addr); - } + ia = (ipfw_iface_info *)a; + ib = (ipfw_iface_info *)b; - xent->type = type; - xent->masklen = masklen; - xent->len = offsetof(ipfw_table_xentry, k) + addrlen; + return (stringnum_cmp(ia->ifname, ib->ifname)); } +/* + * Retrieves table list from kernel, + * optionally sorts it and calls requested function for each table. + * Returns 0 on success. + */ static void -table_list(uint16_t num, int need_header) +ipfw_list_tifaces() { - ipfw_xtable *tbl; - ipfw_table_xentry *xent; - socklen_t l; - uint32_t *a, sz, tval; - char tbuf[128]; - struct in6_addr *addr6; - ip_fw3_opheader *op3; + ipfw_obj_lheader *olh; + ipfw_iface_info *info; + int i, error; - /* Prepend value with IP_FW3 header */ - l = sizeof(ip_fw3_opheader) + sizeof(uint32_t); - op3 = alloca(l); - /* Zero reserved fields */ - memset(op3, 0, sizeof(ip_fw3_opheader)); - a = (uint32_t *)(op3 + 1); - *a = num; - op3->opcode = IP_FW_TABLE_XGETSIZE; - if (do_cmd(IP_FW3, op3, (uintptr_t)&l) < 0) - err(EX_OSERR, "getsockopt(IP_FW_TABLE_XGETSIZE)"); - - /* If a is zero we have nothing to do, the table is empty. */ - if (*a == 0) - return; - - l = *a; - tbl = safe_calloc(1, l); - tbl->opheader.opcode = IP_FW_TABLE_XLIST; - tbl->tbl = num; - if (do_cmd(IP_FW3, tbl, (uintptr_t)&l) < 0) - err(EX_OSERR, "getsockopt(IP_FW_TABLE_XLIST)"); - if (tbl->cnt && need_header) - printf("---table(%d)---\n", tbl->tbl); - sz = tbl->size - sizeof(ipfw_xtable); - xent = &tbl->xent[0]; - while (sz > 0) { - switch (tbl->type) { - case IPFW_TABLE_CIDR: - /* IPv4 or IPv6 prefixes */ - tval = xent->value; - addr6 = &xent->k.addr6; + if ((error = ipfw_get_tracked_ifaces(&olh)) != 0) + err(EX_OSERR, "Unable to request ipfw tracked interface list"); - if ((xent->flags & IPFW_TCF_INET) != 0) { - /* IPv4 address */ - inet_ntop(AF_INET, &addr6->s6_addr32[3], tbuf, sizeof(tbuf)); - } else { - /* IPv6 address */ - inet_ntop(AF_INET6, addr6, tbuf, sizeof(tbuf)); - } + qsort(olh + 1, olh->count, olh->objsize, ifinfo_cmp); - if (co.do_value_as_ip) { - tval = htonl(tval); - printf("%s/%u %s\n", tbuf, xent->masklen, - inet_ntoa(*(struct in_addr *)&tval)); - } else - printf("%s/%u %u\n", tbuf, xent->masklen, tval); - break; - case IPFW_TABLE_INTERFACE: - /* Interface names */ - tval = xent->value; - if (co.do_value_as_ip) { - tval = htonl(tval); - printf("%s %s\n", xent->k.iface, - inet_ntoa(*(struct in_addr *)&tval)); - } else - printf("%s %u\n", xent->k.iface, tval); - } - - if (sz < xent->len) - break; - sz -= xent->len; - xent = (ipfw_table_xentry *)((char *)xent + xent->len); + info = (ipfw_iface_info *)(olh + 1); + for (i = 0; i < olh->count; i++) { + if (info->flags & IPFW_IFFLAG_RESOLVED) + printf("%s ifindex: %d refcount: %u changes: %u\n", + info->ifname, info->ifindex, info->refcnt, + info->gencnt); + else + printf("%s ifindex: unresolved refcount: %u changes: %u\n", + info->ifname, info->refcnt, info->gencnt); + info = (ipfw_iface_info *)((caddr_t)info + olh->objsize); } - free(tbl); + free(olh); } + + + + diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h index 2301c40..926decd 100644 --- a/sbin/ipfw/ipfw2.h +++ b/sbin/ipfw/ipfw2.h @@ -71,6 +71,8 @@ struct _s_x { int x; }; +extern struct _s_x f_ipdscp[]; + enum tokens { TOK_NULL=0, @@ -205,7 +207,28 @@ enum tokens { TOK_LOOKUP, TOK_SOCKARG, TOK_SETDSCP, + TOK_FLOW, + TOK_IFLIST, + /* Table tokens */ + TOK_CREATE, + TOK_DESTROY, + TOK_LIST, + TOK_INFO, + TOK_DETAIL, + TOK_MODIFY, + TOK_FLUSH, + TOK_SWAP, + TOK_ADD, + TOK_DEL, + TOK_VALTYPE, + TOK_ALGO, + TOK_TALIST, + TOK_FTYPE, + TOK_ATOMIC, + TOK_LOCK, + TOK_UNLOCK, }; + /* * the following macro returns an error message if we run out of * arguments. @@ -213,7 +236,19 @@ enum tokens { #define NEED(_p, msg) {if (!_p) errx(EX_USAGE, msg);} #define NEED1(msg) {if (!(*av)) errx(EX_USAGE, msg);} -int pr_u64(uint64_t *pd, int width); +struct buf_pr { + char *buf; /* allocated buffer */ + char *ptr; /* current pointer */ + size_t size; /* total buffer size */ + size_t avail; /* available storage */ + size_t needed; /* length needed */ +}; + +int pr_u64(struct buf_pr *bp, uint64_t *pd, int width); +int bp_alloc(struct buf_pr *b, size_t size); +void bp_free(struct buf_pr *b); +int bprintf(struct buf_pr *b, char *format, ...); + /* memory allocation support */ void *safe_calloc(size_t number, size_t size); @@ -222,14 +257,21 @@ void *safe_realloc(void *ptr, size_t size); /* string comparison functions used for historical compatibility */ int _substrcmp(const char *str1, const char* str2); int _substrcmp2(const char *str1, const char* str2, const char* str3); +int stringnum_cmp(const char *a, const char *b); /* utility functions */ int match_token(struct _s_x *table, char *string); +int match_token_relaxed(struct _s_x *table, char *string); char const *match_value(struct _s_x *p, int value); +size_t concat_tokens(char *buf, size_t bufsize, struct _s_x *table, + char *delimiter); +void fill_flags(struct _s_x *flags, char *p, uint8_t *set, uint8_t *clear); +void print_flags_buffer(char *buf, size_t sz, struct _s_x *list, uint8_t set); +struct _ip_fw3_opheader; int do_cmd(int optname, void *optval, uintptr_t optlen); - -uint32_t ipfw_get_tables_max(void); +int do_set3(int optname, struct _ip_fw3_opheader *op3, uintptr_t optlen); +int do_get3(int optname, struct _ip_fw3_opheader *op3, size_t *optlen); struct in6_addr; void n2mask(struct in6_addr *mask, int n); @@ -268,12 +310,13 @@ void ipfw_delete(char *av[]); void ipfw_flush(int force); void ipfw_zero(int ac, char *av[], int optname); void ipfw_list(int ac, char *av[], int show_counters); +void ipfw_internal_handler(int ac, char *av[]); #ifdef PF /* altq.c */ void altq_set_enabled(int enabled); u_int32_t altq_name_to_qid(const char *name); -void print_altq_cmd(struct _ipfw_insn_altq *altqptr); +void print_altq_cmd(struct buf_pr *bp, struct _ipfw_insn_altq *altqptr); #else #define NO_ALTQ #endif @@ -285,10 +328,10 @@ int ipfw_delete_pipe(int pipe_or_queue, int n); /* ipv6.c */ void print_unreach6_code(uint16_t code); -void print_ip6(struct _ipfw_insn_ip6 *cmd, char const *s); -void print_flow6id(struct _ipfw_insn_u32 *cmd); -void print_icmp6types(struct _ipfw_insn_u32 *cmd); -void print_ext6hdr(struct _ipfw_insn *cmd ); +void print_ip6(struct buf_pr *bp, struct _ipfw_insn_ip6 *cmd, char const *s); +void print_flow6id(struct buf_pr *bp, struct _ipfw_insn_u32 *cmd); +void print_icmp6types(struct buf_pr *bp, struct _ipfw_insn_u32 *cmd); +void print_ext6hdr(struct buf_pr *bp, struct _ipfw_insn *cmd ); struct _ipfw_insn *add_srcip6(struct _ipfw_insn *cmd, char *av, int cblen); struct _ipfw_insn *add_dstip6(struct _ipfw_insn *cmd, char *av, int cblen); @@ -297,3 +340,11 @@ void fill_flow6(struct _ipfw_insn_u32 *cmd, char *av, int cblen); void fill_unreach6_code(u_short *codep, char *str); void fill_icmp6types(struct _ipfw_insn_icmp6 *cmd, char *av, int cblen); int fill_ext6hdr(struct _ipfw_insn *cmd, char *av); + +/* tables.c */ +struct _ipfw_obj_ctlv; +char *table_search_ctlv(struct _ipfw_obj_ctlv *ctlv, uint16_t idx); +void table_sort_ctlv(struct _ipfw_obj_ctlv *ctlv); +int table_check_name(char *tablename); +void ipfw_list_ta(int ac, char *av[]); + diff --git a/sbin/ipfw/ipv6.c b/sbin/ipfw/ipv6.c index ee9bb62..36ee675 100644 --- a/sbin/ipfw/ipv6.c +++ b/sbin/ipfw/ipv6.c @@ -85,21 +85,21 @@ print_unreach6_code(uint16_t code) * Print the ip address contained in a command. */ void -print_ip6(ipfw_insn_ip6 *cmd, char const *s) +print_ip6(struct buf_pr *bp, ipfw_insn_ip6 *cmd, char const *s) { struct hostent *he = NULL; int len = F_LEN((ipfw_insn *) cmd) - 1; struct in6_addr *a = &(cmd->addr6); char trad[255]; - printf("%s%s ", cmd->o.len & F_NOT ? " not": "", s); + bprintf(bp, "%s%s ", cmd->o.len & F_NOT ? " not": "", s); if (cmd->o.opcode == O_IP6_SRC_ME || cmd->o.opcode == O_IP6_DST_ME) { - printf("me6"); + bprintf(bp, "me6"); return; } if (cmd->o.opcode == O_IP6) { - printf(" ip6"); + bprintf(bp, " ip6"); return; } @@ -117,21 +117,21 @@ print_ip6(ipfw_insn_ip6 *cmd, char const *s) if (mb == 128 && co.do_resolv) he = gethostbyaddr((char *)a, sizeof(*a), AF_INET6); if (he != NULL) /* resolved to name */ - printf("%s", he->h_name); + bprintf(bp, "%s", he->h_name); else if (mb == 0) /* any */ - printf("any"); + bprintf(bp, "any"); else { /* numeric IP followed by some kind of mask */ if (inet_ntop(AF_INET6, a, trad, sizeof( trad ) ) == NULL) - printf("Error ntop in print_ip6\n"); - printf("%s", trad ); + bprintf(bp, "Error ntop in print_ip6\n"); + bprintf(bp, "%s", trad ); if (mb < 0) /* XXX not really legal... */ - printf(":%s", + bprintf(bp, ":%s", inet_ntop(AF_INET6, &a[1], trad, sizeof(trad))); else if (mb < 128) - printf("/%d", mb); + bprintf(bp, "/%d", mb); } if (len > 2) - printf(","); + bprintf(bp, ","); } } @@ -165,32 +165,32 @@ fill_icmp6types(ipfw_insn_icmp6 *cmd, char *av, int cblen) void -print_icmp6types(ipfw_insn_u32 *cmd) +print_icmp6types(struct buf_pr *bp, ipfw_insn_u32 *cmd) { int i, j; char sep= ' '; - printf(" ip6 icmp6types"); + bprintf(bp, " ip6 icmp6types"); for (i = 0; i < 7; i++) for (j=0; j < 32; ++j) { if ( (cmd->d[i] & (1 << (j))) == 0) continue; - printf("%c%d", sep, (i*32 + j)); + bprintf(bp, "%c%d", sep, (i*32 + j)); sep = ','; } } void -print_flow6id( ipfw_insn_u32 *cmd) +print_flow6id(struct buf_pr *bp, ipfw_insn_u32 *cmd) { uint16_t i, limit = cmd->o.arg1; char sep = ','; - printf(" flow-id "); + bprintf(bp, " flow-id "); for( i=0; i < limit; ++i) { if (i == limit - 1) sep = ' '; - printf("%d%c", cmd->d[i], sep); + bprintf(bp, "%d%c", cmd->d[i], sep); } } @@ -265,41 +265,41 @@ fill_ext6hdr( ipfw_insn *cmd, char *av) } void -print_ext6hdr( ipfw_insn *cmd ) +print_ext6hdr(struct buf_pr *bp, ipfw_insn *cmd ) { char sep = ' '; - printf(" extension header:"); + bprintf(bp, " extension header:"); if (cmd->arg1 & EXT_FRAGMENT ) { - printf("%cfragmentation", sep); + bprintf(bp, "%cfragmentation", sep); sep = ','; } if (cmd->arg1 & EXT_HOPOPTS ) { - printf("%chop options", sep); + bprintf(bp, "%chop options", sep); sep = ','; } if (cmd->arg1 & EXT_ROUTING ) { - printf("%crouting options", sep); + bprintf(bp, "%crouting options", sep); sep = ','; } if (cmd->arg1 & EXT_RTHDR0 ) { - printf("%crthdr0", sep); + bprintf(bp, "%crthdr0", sep); sep = ','; } if (cmd->arg1 & EXT_RTHDR2 ) { - printf("%crthdr2", sep); + bprintf(bp, "%crthdr2", sep); sep = ','; } if (cmd->arg1 & EXT_DSTOPTS ) { - printf("%cdestination options", sep); + bprintf(bp, "%cdestination options", sep); sep = ','; } if (cmd->arg1 & EXT_AH ) { - printf("%cauthentication header", sep); + bprintf(bp, "%cauthentication header", sep); sep = ','; } if (cmd->arg1 & EXT_ESP ) { - printf("%cencapsulated security payload", sep); + bprintf(bp, "%cencapsulated security payload", sep); } } diff --git a/sbin/ipfw/main.c b/sbin/ipfw/main.c index 82a299b..f25578f 100644 --- a/sbin/ipfw/main.c +++ b/sbin/ipfw/main.c @@ -436,6 +436,10 @@ ipfw_main(int oldac, char **oldav) ipfw_list(ac, av, do_acct); else if (_substrcmp(*av, "show") == 0) ipfw_list(ac, av, 1 /* show counters */); + else if (_substrcmp(*av, "table") == 0) + ipfw_table_handler(ac, av); + else if (_substrcmp(*av, "internal") == 0) + ipfw_internal_handler(ac, av); else errx(EX_USAGE, "bad command `%s'", *av); } diff --git a/sbin/ipfw/tables.c b/sbin/ipfw/tables.c new file mode 100644 index 0000000..b90b46f --- /dev/null +++ b/sbin/ipfw/tables.c @@ -0,0 +1,1781 @@ +/* + * Copyright (c) 2002-2003 Luigi Rizzo + * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Idea and grammar partially left from: + * Copyright (c) 1993 Daniel Boulet + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * in-kernel tables support + * + * $FreeBSD: projects/ipfw/sbin/ipfw/ipfw2.c 267467 2014-06-14 10:58:39Z melifaro $ + */ + + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/sysctl.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <netdb.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> + +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip_fw.h> +#include <arpa/inet.h> + +#include "ipfw2.h" + +static void table_list(ipfw_xtable_info *i, int need_header); +static void table_modify_record(ipfw_obj_header *oh, int ac, char *av[], + int add, int quiet, int update, int atomic); +static int table_flush(ipfw_obj_header *oh); +static int table_destroy(ipfw_obj_header *oh); +static int table_do_create(ipfw_obj_header *oh, ipfw_xtable_info *i); +static int table_do_modify(ipfw_obj_header *oh, ipfw_xtable_info *i); +static int table_do_swap(ipfw_obj_header *oh, char *second); +static void table_create(ipfw_obj_header *oh, int ac, char *av[]); +static void table_modify(ipfw_obj_header *oh, int ac, char *av[]); +static void table_lookup(ipfw_obj_header *oh, int ac, char *av[]); +static void table_lock(ipfw_obj_header *oh, int lock); +static int table_swap(ipfw_obj_header *oh, char *second); +static int table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i); +static int table_show_info(ipfw_xtable_info *i, void *arg); +static void table_fill_ntlv(ipfw_obj_ntlv *ntlv, char *name, uint32_t set, + uint16_t uidx); + +static int table_flush_one(ipfw_xtable_info *i, void *arg); +static int table_show_one(ipfw_xtable_info *i, void *arg); +static int table_do_get_list(ipfw_xtable_info *i, ipfw_obj_header **poh); +static void table_show_list(ipfw_obj_header *oh, int need_header); +static void table_show_entry(ipfw_xtable_info *i, ipfw_obj_tentry *tent); + +static void tentry_fill_key(ipfw_obj_header *oh, ipfw_obj_tentry *tent, + char *key, int add, uint8_t *ptype, uint8_t *pvtype, ipfw_xtable_info *xi); +static void tentry_fill_value(ipfw_obj_header *oh, ipfw_obj_tentry *tent, + char *arg, uint8_t type, uint8_t vtype); + +typedef int (table_cb_t)(ipfw_xtable_info *i, void *arg); +static int tables_foreach(table_cb_t *f, void *arg, int sort); + +#ifndef s6_addr32 +#define s6_addr32 __u6_addr.__u6_addr32 +#endif + +static struct _s_x tabletypes[] = { + { "cidr", IPFW_TABLE_CIDR }, + { "iface", IPFW_TABLE_INTERFACE }, + { "number", IPFW_TABLE_NUMBER }, + { "flow", IPFW_TABLE_FLOW }, + { NULL, 0 } +}; + +static struct _s_x tablevaltypes[] = { + { "number", IPFW_VTYPE_U32 }, + { NULL, 0 } +}; + +static struct _s_x tablefvaltypes[] = { + { "ip", IPFW_VFTYPE_IP }, + { "number", IPFW_VFTYPE_U32 }, + { NULL, 0 } +}; + +static struct _s_x tablecmds[] = { + { "add", TOK_ADD }, + { "delete", TOK_DEL }, + { "create", TOK_CREATE }, + { "destroy", TOK_DESTROY }, + { "flush", TOK_FLUSH }, + { "modify", TOK_MODIFY }, + { "swap", TOK_SWAP }, + { "info", TOK_INFO }, + { "detail", TOK_DETAIL }, + { "list", TOK_LIST }, + { "lookup", TOK_LOOKUP }, + { "atomic", TOK_ATOMIC }, + { "lock", TOK_LOCK }, + { "unlock", TOK_UNLOCK }, + { NULL, 0 } +}; + +static int +lookup_host (char *host, struct in_addr *ipaddr) +{ + struct hostent *he; + + if (!inet_aton(host, ipaddr)) { + if ((he = gethostbyname(host)) == NULL) + return(-1); + *ipaddr = *(struct in_addr *)he->h_addr_list[0]; + } + return(0); +} + +static int +get_token(struct _s_x *table, char *string, char *errbase) +{ + int tcmd; + + if ((tcmd = match_token_relaxed(table, string)) < 0) + errx(EX_USAGE, "%s %s %s", + (tcmd == 0) ? "invalid" : "ambiguous", errbase, string); + + return (tcmd); +} + +/* + * This one handles all table-related commands + * ipfw table NAME create ... + * ipfw table NAME modify ... + * ipfw table NAME destroy + * ipfw table NAME swap NAME + * ipfw table NAME lock + * ipfw table NAME unlock + * ipfw table NAME add addr[/masklen] [value] + * ipfw table NAME add [addr[/masklen] value] [addr[/masklen] value] .. + * ipfw table NAME delete addr[/masklen] [addr[/masklen]] .. + * ipfw table NAME lookup addr + * ipfw table {NAME | all} flush + * ipfw table {NAME | all} list + * ipfw table {NAME | all} info + * ipfw table {NAME | all} detail + */ +void +ipfw_table_handler(int ac, char *av[]) +{ + int do_add, is_all; + int atomic, error, tcmd; + ipfw_xtable_info i; + ipfw_obj_header oh; + char *tablename; + uint32_t set; + void *arg; + + memset(&oh, 0, sizeof(oh)); + is_all = 0; + if (co.use_set != 0) + set = co.use_set - 1; + else + set = 0; + + ac--; av++; + NEED1("table needs name"); + tablename = *av; + + if (table_check_name(tablename) == 0) { + table_fill_ntlv(&oh.ntlv, *av, set, 1); + oh.idx = 1; + } else { + if (strcmp(tablename, "all") == 0) + is_all = 1; + else + errx(EX_USAGE, "table name %s is invalid", tablename); + } + ac--; av++; + NEED1("table needs command"); + + tcmd = get_token(tablecmds, *av, "table command"); + /* Check if atomic operation was requested */ + atomic = 0; + if (tcmd == TOK_ATOMIC) { + ac--; av++; + NEED1("atomic needs command"); + tcmd = get_token(tablecmds, *av, "table command"); + switch (tcmd) { + case TOK_ADD: + break; + default: + errx(EX_USAGE, "atomic is not compatible with %s", *av); + } + atomic = 1; + } + + switch (tcmd) { + case TOK_LIST: + case TOK_INFO: + case TOK_DETAIL: + case TOK_FLUSH: + break; + default: + if (is_all != 0) + errx(EX_USAGE, "table name required"); + } + + switch (tcmd) { + case TOK_ADD: + case TOK_DEL: + do_add = **av == 'a'; + ac--; av++; + table_modify_record(&oh, ac, av, do_add, co.do_quiet, + co.do_quiet, atomic); + break; + case TOK_CREATE: + ac--; av++; + table_create(&oh, ac, av); + break; + case TOK_MODIFY: + ac--; av++; + table_modify(&oh, ac, av); + break; + case TOK_DESTROY: + if (table_destroy(&oh) != 0) + err(EX_OSERR, "failed to destroy table %s", tablename); + break; + case TOK_FLUSH: + if (is_all == 0) { + if ((error = table_flush(&oh)) != 0) + err(EX_OSERR, "failed to flush table %s info", + tablename); + } else { + error = tables_foreach(table_flush_one, &oh, 1); + if (error != 0) + err(EX_OSERR, "failed to flush tables list"); + } + break; + case TOK_SWAP: + ac--; av++; + NEED1("second table name required"); + table_swap(&oh, *av); + break; + case TOK_LOCK: + case TOK_UNLOCK: + table_lock(&oh, (tcmd == TOK_LOCK)); + break; + case TOK_DETAIL: + case TOK_INFO: + arg = (tcmd == TOK_DETAIL) ? (void *)1 : NULL; + if (is_all == 0) { + if ((error = table_get_info(&oh, &i)) != 0) + err(EX_OSERR, "failed to request table info"); + table_show_info(&i, arg); + } else { + error = tables_foreach(table_show_info, arg, 1); + if (error != 0) + err(EX_OSERR, "failed to request tables list"); + } + break; + case TOK_LIST: + if (is_all == 0) { + ipfw_xtable_info i; + if ((error = table_get_info(&oh, &i)) != 0) + err(EX_OSERR, "failed to request table info"); + table_show_one(&i, NULL); + } else { + error = tables_foreach(table_show_one, NULL, 1); + if (error != 0) + err(EX_OSERR, "failed to request tables list"); + } + break; + case TOK_LOOKUP: + ac--; av++; + table_lookup(&oh, ac, av); + break; + } +} + +static void +table_fill_ntlv(ipfw_obj_ntlv *ntlv, char *name, uint32_t set, uint16_t uidx) +{ + + ntlv->head.type = IPFW_TLV_TBL_NAME; + ntlv->head.length = sizeof(ipfw_obj_ntlv); + ntlv->idx = uidx; + ntlv->set = set; + strlcpy(ntlv->name, name, sizeof(ntlv->name)); +} + +static void +table_fill_objheader(ipfw_obj_header *oh, ipfw_xtable_info *i) +{ + + oh->idx = 1; + table_fill_ntlv(&oh->ntlv, i->tablename, i->set, 1); +} + +static struct _s_x tablenewcmds[] = { + { "type", TOK_TYPE }, + { "ftype", TOK_FTYPE }, + { "valtype", TOK_VALTYPE }, + { "algo", TOK_ALGO }, + { "limit", TOK_LIMIT }, + { "locked", TOK_LOCK }, + { NULL, 0 } +}; + +static struct _s_x flowtypecmds[] = { + { "src-ip", IPFW_TFFLAG_SRCIP }, + { "proto", IPFW_TFFLAG_PROTO }, + { "src-port", IPFW_TFFLAG_SRCPORT }, + { "dst-ip", IPFW_TFFLAG_DSTIP }, + { "dst-port", IPFW_TFFLAG_DSTPORT }, + { NULL, 0 } +}; + +int +table_parse_type(uint8_t ttype, char *p, uint8_t *tflags) +{ + uint8_t fset, fclear; + + /* Parse type options */ + switch(ttype) { + case IPFW_TABLE_FLOW: + fset = fclear = 0; + fill_flags(flowtypecmds, p, &fset, + &fclear); + *tflags = fset; + break; + default: + return (EX_USAGE); + } + + return (0); +} + +void +table_print_type(char *tbuf, size_t size, uint8_t type, uint8_t tflags) +{ + const char *tname; + int l; + + if ((tname = match_value(tabletypes, type)) == NULL) + tname = "unknown"; + + l = snprintf(tbuf, size, "%s", tname); + tbuf += l; + size -= l; + + switch(type) { + case IPFW_TABLE_FLOW: + if (tflags != 0) { + *tbuf++ = ':'; + l--; + print_flags_buffer(tbuf, size, flowtypecmds, tflags); + } + break; + } +} + +/* + * Creates new table + * + * ipfw table NAME create [ type { cidr | iface | u32 } ] + * [ valtype { number | ip | dscp } ] + * [ algo algoname ] + */ +static void +table_create(ipfw_obj_header *oh, int ac, char *av[]) +{ + ipfw_xtable_info xi; + int error, tcmd, val; + size_t sz; + char *p; + char tbuf[128]; + + sz = sizeof(tbuf); + memset(&xi, 0, sizeof(xi)); + + /* Set some defaults to preserve compability */ + xi.type = IPFW_TABLE_CIDR; + xi.vtype = IPFW_VTYPE_U32; + + while (ac > 0) { + tcmd = get_token(tablenewcmds, *av, "option"); + ac--; av++; + + switch (tcmd) { + case TOK_LIMIT: + NEED1("limit value required"); + xi.limit = strtol(*av, NULL, 10); + ac--; av++; + break; + case TOK_TYPE: + NEED1("table type required"); + /* Type may have suboptions after ':' */ + if ((p = strchr(*av, ':')) != NULL) + *p++ = '\0'; + val = match_token(tabletypes, *av); + if (val == -1) { + concat_tokens(tbuf, sizeof(tbuf), tabletypes, + ", "); + errx(EX_USAGE, + "Unknown tabletype: %s. Supported: %s", + *av, tbuf); + } + xi.type = val; + if (p != NULL) { + error = table_parse_type(val, p, &xi.tflags); + if (error != 0) + errx(EX_USAGE, + "Unsupported suboptions: %s", p); + } + ac--; av++; + break; + case TOK_VALTYPE: + NEED1("table value type required"); + val = match_token(tablevaltypes, *av); + if (val != -1) { + xi.vtype = val; + ac--; av++; + break; + } + concat_tokens(tbuf, sizeof(tbuf), tablevaltypes, ", "); + errx(EX_USAGE, "Unknown value type: %s. Supported: %s", + *av, tbuf); + break; + case TOK_FTYPE: + NEED1("table value format type required"); + val = match_token(tablefvaltypes, *av); + if (val != -1) { + xi.vftype = val; + ac--; av++; + break; + } + concat_tokens(tbuf, sizeof(tbuf), tablefvaltypes, ", "); + errx(EX_USAGE, "Unknown format type: %s. Supported: %s", + *av, tbuf); + break; + case TOK_ALGO: + NEED1("table algorithm name required"); + if (strlen(*av) > sizeof(xi.algoname)) + errx(EX_USAGE, "algorithm name too long"); + strlcpy(xi.algoname, *av, sizeof(xi.algoname)); + ac--; av++; + break; + case TOK_LOCK: + xi.flags |= IPFW_TGFLAGS_LOCKED; + break; + } + } + + if ((error = table_do_create(oh, &xi)) != 0) + err(EX_OSERR, "Table creation failed"); +} + +/* + * Creates new table + * + * Request: [ ipfw_obj_header ipfw_xtable_info ] + * + * Returns 0 on success. + */ +static int +table_do_create(ipfw_obj_header *oh, ipfw_xtable_info *i) +{ + char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info)]; + int error; + + memcpy(tbuf, oh, sizeof(*oh)); + memcpy(tbuf + sizeof(*oh), i, sizeof(*i)); + oh = (ipfw_obj_header *)tbuf; + + error = do_set3(IP_FW_TABLE_XCREATE, &oh->opheader, sizeof(tbuf)); + + return (error); +} + +/* + * Modifies existing table + * + * ipfw table NAME modify [ limit number ] [ ftype { number | ip } ] + */ +static void +table_modify(ipfw_obj_header *oh, int ac, char *av[]) +{ + ipfw_xtable_info xi; + int error, tcmd, val; + size_t sz; + char tbuf[128]; + + sz = sizeof(tbuf); + memset(&xi, 0, sizeof(xi)); + + while (ac > 0) { + tcmd = get_token(tablenewcmds, *av, "option"); + ac--; av++; + + switch (tcmd) { + case TOK_LIMIT: + NEED1("limit value required"); + xi.limit = strtol(*av, NULL, 10); + xi.mflags |= IPFW_TMFLAGS_LIMIT; + ac--; av++; + break; + case TOK_FTYPE: + NEED1("table value format type required"); + val = match_token(tablefvaltypes, *av); + if (val != -1) { + xi.vftype = val; + xi.mflags |= IPFW_TMFLAGS_FTYPE; + ac--; av++; + break; + } + concat_tokens(tbuf, sizeof(tbuf), tablefvaltypes, ", "); + errx(EX_USAGE, "Unknown value type: %s. Supported: %s", + *av, tbuf); + break; + } + } + + if ((error = table_do_modify(oh, &xi)) != 0) + err(EX_OSERR, "Table modification failed"); +} + +/* + * Modifies existing table. + * + * Request: [ ipfw_obj_header ipfw_xtable_info ] + * + * Returns 0 on success. + */ +static int +table_do_modify(ipfw_obj_header *oh, ipfw_xtable_info *i) +{ + char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info)]; + int error; + + memcpy(tbuf, oh, sizeof(*oh)); + memcpy(tbuf + sizeof(*oh), i, sizeof(*i)); + oh = (ipfw_obj_header *)tbuf; + + error = do_set3(IP_FW_TABLE_XMODIFY, &oh->opheader, sizeof(tbuf)); + + return (error); +} + +/* + * Locks or unlocks given table + */ +static void +table_lock(ipfw_obj_header *oh, int lock) +{ + ipfw_xtable_info xi; + int error; + + memset(&xi, 0, sizeof(xi)); + + xi.mflags |= IPFW_TMFLAGS_LOCK; + xi.flags |= (lock != 0) ? IPFW_TGFLAGS_LOCKED : 0; + + if ((error = table_do_modify(oh, &xi)) != 0) + err(EX_OSERR, "Table %s failed", lock != 0 ? "lock" : "unlock"); +} + +/* + * Destroys given table specified by @oh->ntlv. + * Returns 0 on success. + */ +static int +table_destroy(ipfw_obj_header *oh) +{ + + if (do_set3(IP_FW_TABLE_XDESTROY, &oh->opheader, sizeof(*oh)) != 0) + return (-1); + + return (0); +} + +/* + * Flushes given table specified by @oh->ntlv. + * Returns 0 on success. + */ +static int +table_flush(ipfw_obj_header *oh) +{ + + if (do_set3(IP_FW_TABLE_XFLUSH, &oh->opheader, sizeof(*oh)) != 0) + return (-1); + + return (0); +} + +static int +table_do_swap(ipfw_obj_header *oh, char *second) +{ + char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ntlv)]; + int error; + + memset(tbuf, 0, sizeof(tbuf)); + memcpy(tbuf, oh, sizeof(*oh)); + oh = (ipfw_obj_header *)tbuf; + table_fill_ntlv((ipfw_obj_ntlv *)(oh + 1), second, oh->ntlv.set, 1); + + error = do_set3(IP_FW_TABLE_XSWAP, &oh->opheader, sizeof(tbuf)); + + return (error); +} + +/* + * Swaps given table with @second one. + */ +static int +table_swap(ipfw_obj_header *oh, char *second) +{ + int error; + + if (table_check_name(second) != 0) + errx(EX_USAGE, "table name %s is invalid", second); + + error = table_do_swap(oh, second); + + switch (error) { + case EINVAL: + errx(EX_USAGE, "Unable to swap table: check types"); + case EFBIG: + errx(EX_USAGE, "Unable to swap table: check limits"); + } + + return (0); +} + + +/* + * Retrieves table in given table specified by @oh->ntlv. + * it inside @i. + * Returns 0 on success. + */ +static int +table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i) +{ + char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info)]; + int error; + size_t sz; + + sz = sizeof(tbuf); + memset(tbuf, 0, sizeof(tbuf)); + memcpy(tbuf, oh, sizeof(*oh)); + oh = (ipfw_obj_header *)tbuf; + + if ((error = do_get3(IP_FW_TABLE_XINFO, &oh->opheader, &sz)) != 0) + return (error); + + if (sz < sizeof(tbuf)) + return (EINVAL); + + *i = *(ipfw_xtable_info *)(oh + 1); + + return (0); +} + +static struct _s_x tablealgoclass[] = { + { "hash", IPFW_TACLASS_HASH }, + { "array", IPFW_TACLASS_ARRAY }, + { "radix", IPFW_TACLASS_RADIX }, + { NULL, 0 } +}; + +struct ta_cldata { + uint8_t taclass; + uint8_t spare4; + uint16_t itemsize; + uint16_t itemsize6; + uint32_t size; + uint32_t count; +}; + +/* + * Print global/per-AF table @i algorithm info. + */ +static void +table_show_tainfo(ipfw_xtable_info *i, struct ta_cldata *d, + const char *af, const char *taclass) +{ + + switch (d->taclass) { + case IPFW_TACLASS_HASH: + case IPFW_TACLASS_ARRAY: + printf(" %salgorithm %s info\n", af, taclass); + if (d->itemsize == d->itemsize6) + printf(" size: %u items: %u itemsize: %u\n", + d->size, d->count, d->itemsize); + else + printf(" size: %u items: %u " + "itemsize4: %u itemsize6: %u\n", + d->size, d->count, + d->itemsize, d->itemsize6); + break; + case IPFW_TACLASS_RADIX: + printf(" %salgorithm %s info\n", af, taclass); + if (d->itemsize == d->itemsize6) + printf(" items: %u itemsize: %u\n", + d->count, d->itemsize); + else + printf(" items: %u " + "itemsize4: %u itemsize6: %u\n", + d->count, d->itemsize, d->itemsize6); + break; + default: + printf(" algo class: %s\n", taclass); + } +} + +/* + * Prints table info struct @i in human-readable form. + */ +static int +table_show_info(ipfw_xtable_info *i, void *arg) +{ + const char *vtype, *vftype; + ipfw_ta_tinfo *tainfo; + int afdata, afitem; + struct ta_cldata d; + char ttype[64], tvtype[64]; + + table_print_type(ttype, sizeof(ttype), i->type, i->tflags); + if ((vtype = match_value(tablevaltypes, i->vtype)) == NULL) + vtype = "unknown"; + if ((vftype = match_value(tablefvaltypes, i->vftype)) == NULL) + vftype = "unknown"; + if (strcmp(vtype, vftype) != 0) + snprintf(tvtype, sizeof(tvtype), "%s(%s)", vtype, vftype); + else + snprintf(tvtype, sizeof(tvtype), "%s", vtype); + + printf("--- table(%s), set(%u) ---\n", i->tablename, i->set); + if ((i->flags & IPFW_TGFLAGS_LOCKED) != 0) + printf(" kindex: %d, type: %s, locked\n", i->kidx, ttype); + else + printf(" kindex: %d, type: %s\n", i->kidx, ttype); + printf(" valtype: %s, references: %u\n", tvtype, i->refcnt); + printf(" algorithm: %s\n", i->algoname); + printf(" items: %u, size: %u\n", i->count, i->size); + if (i->limit > 0) + printf(" limit: %u\n", i->limit); + + /* Print algo-specific info if requested & set */ + if (arg == NULL) + return (0); + + if ((i->ta_info.flags & IPFW_TATFLAGS_DATA) == 0) + return (0); + tainfo = &i->ta_info; + + afdata = 0; + afitem = 0; + if (tainfo->flags & IPFW_TATFLAGS_AFDATA) + afdata = 1; + if (tainfo->flags & IPFW_TATFLAGS_AFITEM) + afitem = 1; + + memset(&d, 0, sizeof(d)); + d.taclass = tainfo->taclass4; + d.size = tainfo->size4; + d.count = tainfo->count4; + d.itemsize = tainfo->itemsize4; + if (afdata == 0 && afitem != 0) + d.itemsize6 = tainfo->itemsize6; + else + d.itemsize6 = d.itemsize; + if ((vtype = match_value(tablealgoclass, d.taclass)) == NULL) + vtype = "unknown"; + + if (afdata == 0) { + table_show_tainfo(i, &d, "", vtype); + } else { + table_show_tainfo(i, &d, "IPv4 ", vtype); + memset(&d, 0, sizeof(d)); + d.taclass = tainfo->taclass6; + if ((vtype = match_value(tablealgoclass, d.taclass)) == NULL) + vtype = "unknown"; + d.size = tainfo->size6; + d.count = tainfo->count6; + d.itemsize = tainfo->itemsize6; + d.itemsize6 = d.itemsize; + table_show_tainfo(i, &d, "IPv6 ", vtype); + } + + return (0); +} + + +/* + * Function wrappers which can be used either + * as is or as foreach function parameter. + */ + +static int +table_show_one(ipfw_xtable_info *i, void *arg) +{ + ipfw_obj_header *oh; + int error; + + if ((error = table_do_get_list(i, &oh)) != 0) { + err(EX_OSERR, "Error requesting table %s list", i->tablename); + return (error); + } + + table_show_list(oh, 1); + + free(oh); + return (0); +} + +static int +table_flush_one(ipfw_xtable_info *i, void *arg) +{ + ipfw_obj_header *oh; + + oh = (ipfw_obj_header *)arg; + + table_fill_ntlv(&oh->ntlv, i->tablename, i->set, 1); + + return (table_flush(oh)); +} + +static int +table_do_modify_record(int cmd, ipfw_obj_header *oh, + ipfw_obj_tentry *tent, int count, int atomic) +{ + ipfw_obj_ctlv *ctlv; + ipfw_obj_tentry *tent_base; + caddr_t pbuf; + char xbuf[sizeof(*oh) + sizeof(ipfw_obj_ctlv) + sizeof(*tent)]; + int error, i; + size_t sz; + + sz = sizeof(*ctlv) + sizeof(*tent) * count; + if (count == 1) { + memset(xbuf, 0, sizeof(xbuf)); + pbuf = xbuf; + } else { + if ((pbuf = calloc(1, sizeof(*oh) + sz)) == NULL) + return (ENOMEM); + } + + memcpy(pbuf, oh, sizeof(*oh)); + oh = (ipfw_obj_header *)pbuf; + oh->opheader.version = 1; + + ctlv = (ipfw_obj_ctlv *)(oh + 1); + ctlv->count = count; + ctlv->head.length = sz; + if (atomic != 0) + ctlv->flags |= IPFW_CTF_ATOMIC; + + tent_base = tent; + memcpy(ctlv + 1, tent, sizeof(*tent) * count); + tent = (ipfw_obj_tentry *)(ctlv + 1); + for (i = 0; i < count; i++, tent++) { + tent->head.length = sizeof(ipfw_obj_tentry); + tent->idx = oh->idx; + } + + sz += sizeof(*oh); + error = do_get3(cmd, &oh->opheader, &sz); + tent = (ipfw_obj_tentry *)(ctlv + 1); + /* Copy result back to provided buffer */ + memcpy(tent_base, ctlv + 1, sizeof(*tent) * count); + + if (pbuf != xbuf) + free(pbuf); + + return (error); +} + +static void +table_modify_record(ipfw_obj_header *oh, int ac, char *av[], int add, + int quiet, int update, int atomic) +{ + ipfw_obj_tentry *ptent, tent, *tent_buf; + ipfw_xtable_info xi; + uint8_t type, vtype; + int cmd, count, error, i, ignored; + char *texterr, *etxt, *px; + + if (ac == 0) + errx(EX_USAGE, "address required"); + + if (add != 0) { + cmd = IP_FW_TABLE_XADD; + texterr = "Adding record failed"; + } else { + cmd = IP_FW_TABLE_XDEL; + texterr = "Deleting record failed"; + } + + /* + * Calculate number of entries: + * Assume [key val] x N for add + * and + * key x N for delete + */ + count = (add != 0) ? ac / 2 + 1 : ac; + + if (count <= 1) { + /* Adding single entry with/without value */ + memset(&tent, 0, sizeof(tent)); + tent_buf = &tent; + } else { + + if ((tent_buf = calloc(count, sizeof(tent))) == NULL) + errx(EX_OSERR, + "Unable to allocate memory for all entries"); + } + ptent = tent_buf; + + memset(&xi, 0, sizeof(xi)); + count = 0; + while (ac > 0) { + tentry_fill_key(oh, ptent, *av, add, &type, &vtype, &xi); + + /* + * compability layer: auto-create table if not exists + */ + if (xi.tablename[0] == '\0') { + xi.type = type; + xi.vtype = vtype; + strlcpy(xi.tablename, oh->ntlv.name, + sizeof(xi.tablename)); + fprintf(stderr, "DEPRECATED: inserting data info " + "non-existent table %s. (auto-created)\n", + xi.tablename); + table_do_create(oh, &xi); + } + + oh->ntlv.type = type; + ac--; av++; + + if (add != 0 && ac > 0) { + tentry_fill_value(oh, ptent, *av, type, vtype); + ac--; av++; + } + + if (update != 0) + ptent->head.flags |= IPFW_TF_UPDATE; + + count++; + ptent++; + } + + error = table_do_modify_record(cmd, oh, tent_buf, count, atomic); + + /* + * Compatibility stuff: do not yell on duplicate keys or + * failed deletions. + */ + if (error == 0 || (error == EEXIST && add != 0) || + (error == ENOENT && add == 0)) { + if (quiet != 0) { + if (tent_buf != &tent) + free(tent_buf); + return; + } + } + + /* Report results back */ + ptent = tent_buf; + for (i = 0; i < count; ptent++, i++) { + ignored = 0; + switch (ptent->result) { + case IPFW_TR_ADDED: + px = "added"; + break; + case IPFW_TR_DELETED: + px = "deleted"; + break; + case IPFW_TR_UPDATED: + px = "updated"; + break; + case IPFW_TR_LIMIT: + px = "limit"; + ignored = 1; + break; + case IPFW_TR_ERROR: + px = "error"; + ignored = 1; + break; + case IPFW_TR_NOTFOUND: + px = "notfound"; + ignored = 1; + break; + case IPFW_TR_EXISTS: + px = "exists"; + ignored = 1; + break; + case IPFW_TR_IGNORED: + px = "ignored"; + ignored = 1; + break; + default: + px = "unknown"; + ignored = 1; + } + + if (error != 0 && atomic != 0 && ignored == 0) + printf("%s(reverted): ", px); + else + printf("%s: ", px); + + table_show_entry(&xi, ptent); + } + + if (tent_buf != &tent) + free(tent_buf); + + if (error == 0) + return; + + /* Try to provide more human-readable error */ + switch (error) { + case EEXIST: + etxt = "record already exists"; + break; + case EFBIG: + etxt = "limit hit"; + break; + case ESRCH: + etxt = "table not found"; + break; + case ENOENT: + etxt = "record not found"; + break; + case EACCES: + etxt = "table is locked"; + break; + default: + etxt = strerror(error); + } + + errx(EX_OSERR, "%s: %s", texterr, etxt); +} + +static int +table_do_lookup(ipfw_obj_header *oh, char *key, ipfw_xtable_info *xi, + ipfw_obj_tentry *xtent) +{ + char xbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_obj_tentry)]; + ipfw_obj_tentry *tent; + uint8_t type, vtype; + int error; + size_t sz; + + memcpy(xbuf, oh, sizeof(*oh)); + oh = (ipfw_obj_header *)xbuf; + tent = (ipfw_obj_tentry *)(oh + 1); + + memset(tent, 0, sizeof(*tent)); + tent->head.length = sizeof(*tent); + tent->idx = 1; + + tentry_fill_key(oh, tent, key, 0, &type, &vtype, xi); + oh->ntlv.type = type; + + sz = sizeof(xbuf); + if ((error = do_get3(IP_FW_TABLE_XFIND, &oh->opheader, &sz)) != 0) + return (error); + + if (sz < sizeof(xbuf)) + return (EINVAL); + + *xtent = *tent; + + return (0); +} + +static void +table_lookup(ipfw_obj_header *oh, int ac, char *av[]) +{ + ipfw_obj_tentry xtent; + ipfw_xtable_info xi; + char key[64]; + int error; + + if (ac == 0) + errx(EX_USAGE, "address required"); + + strlcpy(key, *av, sizeof(key)); + + memset(&xi, 0, sizeof(xi)); + error = table_do_lookup(oh, key, &xi, &xtent); + + switch (error) { + case 0: + break; + case ESRCH: + errx(EX_UNAVAILABLE, "Table %s not found", oh->ntlv.name); + case ENOENT: + errx(EX_UNAVAILABLE, "Entry %s not found", *av); + case ENOTSUP: + errx(EX_UNAVAILABLE, "Table %s algo does not support " + "\"lookup\" method", oh->ntlv.name); + default: + err(EX_OSERR, "getsockopt(IP_FW_TABLE_XFIND)"); + } + + table_show_entry(&xi, &xtent); +} + +static void +tentry_fill_key_type(char *arg, ipfw_obj_tentry *tentry, uint8_t type, + uint8_t tflags) +{ + char *p, *pp; + int mask, af; + struct in6_addr *paddr, tmp; + struct tflow_entry *tfe; + uint32_t key, *pkey; + uint16_t port; + struct protoent *pent; + struct servent *sent; + int masklen; + + masklen = 0; + af = 0; + paddr = (struct in6_addr *)&tentry->k; + + switch (type) { + case IPFW_TABLE_CIDR: + /* Remove / if exists */ + if ((p = strchr(arg, '/')) != NULL) { + *p = '\0'; + mask = atoi(p + 1); + } + + if (inet_pton(AF_INET, arg, paddr) == 1) { + if (p != NULL && mask > 32) + errx(EX_DATAERR, "bad IPv4 mask width: %s", + p + 1); + + masklen = p ? mask : 32; + af = AF_INET; + } else if (inet_pton(AF_INET6, arg, paddr) == 1) { + if (IN6_IS_ADDR_V4COMPAT(paddr)) + errx(EX_DATAERR, + "Use IPv4 instead of v4-compatible"); + if (p != NULL && mask > 128) + errx(EX_DATAERR, "bad IPv6 mask width: %s", + p + 1); + + masklen = p ? mask : 128; + af = AF_INET6; + } else { + /* Assume FQDN */ + if (lookup_host(arg, (struct in_addr *)paddr) != 0) + errx(EX_NOHOST, "hostname ``%s'' unknown", arg); + + masklen = 32; + type = IPFW_TABLE_CIDR; + af = AF_INET; + } + break; + case IPFW_TABLE_INTERFACE: + /* Assume interface name. Copy significant data only */ + mask = MIN(strlen(arg), IF_NAMESIZE - 1); + memcpy(paddr, arg, mask); + /* Set mask to exact match */ + masklen = 8 * IF_NAMESIZE; + break; + case IPFW_TABLE_NUMBER: + /* Port or any other key */ + key = strtol(arg, &p, 10); + if (*p != '\0') + errx(EX_DATAERR, "Invalid number: %s", arg); + + pkey = (uint32_t *)paddr; + *pkey = key; + masklen = 32; + break; + case IPFW_TABLE_FLOW: + /* Assume [src-ip][,proto][,src-port][,dst-ip][,dst-port] */ + tfe = &tentry->k.flow; + af = 0; + + /* Handle <ipv4|ipv6> */ + if ((tflags & IPFW_TFFLAG_SRCIP) != 0) { + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + /* Determine family using temporary storage */ + if (inet_pton(AF_INET, arg, &tmp) == 1) { + if (af != 0 && af != AF_INET) + errx(EX_DATAERR, + "Inconsistent address family\n"); + af = AF_INET; + memcpy(&tfe->a.a4.sip, &tmp, 4); + } else if (inet_pton(AF_INET6, arg, &tmp) == 1) { + if (af != 0 && af != AF_INET6) + errx(EX_DATAERR, + "Inconsistent address family\n"); + af = AF_INET6; + memcpy(&tfe->a.a6.sip6, &tmp, 16); + } + + arg = p; + } + + /* Handle <proto-num|proto-name> */ + if ((tflags & IPFW_TFFLAG_PROTO) != 0) { + if (arg == NULL) + errx(EX_DATAERR, "invalid key: proto missing"); + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + + key = strtol(arg, &pp, 10); + if (*pp != '\0') { + if ((pent = getprotobyname(arg)) == NULL) + errx(EX_DATAERR, "Unknown proto: %s", + arg); + else + key = pent->p_proto; + } + + if (key > 255) + errx(EX_DATAERR, "Bad protocol number: %u",key); + + tfe->proto = key; + + arg = p; + } + + /* Handle <port-num|service-name> */ + if ((tflags & IPFW_TFFLAG_SRCPORT) != 0) { + if (arg == NULL) + errx(EX_DATAERR, "invalid key: src port missing"); + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + + if ((port = htons(strtol(arg, NULL, 10))) == 0) { + if ((sent = getservbyname(arg, NULL)) == NULL) + errx(EX_DATAERR, "Unknown service: %s", + arg); + else + key = sent->s_port; + } + + tfe->sport = port; + + arg = p; + } + + /* Handle <ipv4|ipv6>*/ + if ((tflags & IPFW_TFFLAG_DSTIP) != 0) { + if (arg == NULL) + errx(EX_DATAERR, "invalid key: dst ip missing"); + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + /* Determine family using temporary storage */ + if (inet_pton(AF_INET, arg, &tmp) == 1) { + if (af != 0 && af != AF_INET) + errx(EX_DATAERR, + "Inconsistent address family"); + af = AF_INET; + memcpy(&tfe->a.a4.dip, &tmp, 4); + } else if (inet_pton(AF_INET6, arg, &tmp) == 1) { + if (af != 0 && af != AF_INET6) + errx(EX_DATAERR, + "Inconsistent address family"); + af = AF_INET6; + memcpy(&tfe->a.a6.dip6, &tmp, 16); + } + + arg = p; + } + + /* Handle <port-num|service-name> */ + if ((tflags & IPFW_TFFLAG_DSTPORT) != 0) { + if (arg == NULL) + errx(EX_DATAERR, "invalid key: dst port missing"); + if ((p = strchr(arg, ',')) != NULL) + *p++ = '\0'; + + if ((port = htons(strtol(arg, NULL, 10))) == 0) { + if ((sent = getservbyname(arg, NULL)) == NULL) + errx(EX_DATAERR, "Unknown service: %s", + arg); + else + key = sent->s_port; + } + + tfe->dport = port; + + arg = p; + } + + tfe->af = af; + + break; + + default: + errx(EX_DATAERR, "Unsupported table type: %d", type); + } + + tentry->subtype = af; + tentry->masklen = masklen; +} + +static void +tentry_fill_key(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *key, + int add, uint8_t *ptype, uint8_t *pvtype, ipfw_xtable_info *xi) +{ + uint8_t type, tflags, vtype; + int error; + char *del; + + type = 0; + tflags = 0; + vtype = 0; + + if (xi->tablename[0] == '\0') + error = table_get_info(oh, xi); + else + error = 0; + + if (error == 0) { + /* Table found. */ + type = xi->type; + tflags = xi->tflags; + vtype = xi->vtype; + } else { + if (error != ESRCH) + errx(EX_OSERR, "Error requesting table %s info", + oh->ntlv.name); + if (add == 0) + errx(EX_DATAERR, "Table %s does not exist", + oh->ntlv.name); + /* + * Table does not exist. + * Compability layer: try to interpret data as CIDR + * before failing. + */ + if ((del = strchr(key, '/')) != NULL) + *del = '\0'; + if (inet_pton(AF_INET, key, &tent->k.addr6) == 1 || + inet_pton(AF_INET6, key, &tent->k.addr6) == 1) { + /* OK Prepare and send */ + type = IPFW_TABLE_CIDR; + /* + * XXX: Value type is forced to be u32. + * This should be changed for MFC. + */ + vtype = IPFW_VTYPE_U32; + } else { + /* Inknown key */ + errx(EX_USAGE, "Table %s does not exist, cannot guess " + "key '%s' type", oh->ntlv.name, key); + } + if (del != NULL) + *del = '/'; + } + + tentry_fill_key_type(key, tent, type, tflags); + + *ptype = type; + *pvtype = vtype; +} + +static void +tentry_fill_value(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *arg, + uint8_t type, uint8_t vtype) +{ + uint32_t val; + char *p; + + /* Try to interpret as number first */ + tent->value = strtoul(arg, &p, 0); + if (*p == '\0') + return; + if (inet_pton(AF_INET, arg, &val) == 1) { + tent->value = ntohl(val); + return; + } + /* Try hostname */ + if (lookup_host(arg, (struct in_addr *)&tent->value) == 0) + return; + errx(EX_OSERR, "Unable to parse value %s", arg); +#if 0 + switch (vtype) { + case IPFW_VTYPE_U32: + tent->value = strtoul(arg, &p, 0); + if (*p != '\0') + errx(EX_USAGE, "Invalid number: %s", arg); + break; + case IPFW_VTYPE_IP: + if (inet_pton(AF_INET, arg, &tent->value) == 1) + break; + /* Try hostname */ + if (lookup_host(arg, (struct in_addr *)&tent->value) != 0) + errx(EX_USAGE, "Invalid IPv4 address: %s", arg); + break; + case IPFW_VTYPE_DSCP: + if (isalpha(*arg)) { + if ((code = match_token(f_ipdscp, arg)) == -1) + errx(EX_DATAERR, "Unknown DSCP code"); + } else { + code = strtoul(arg, NULL, 10); + if (code < 0 || code > 63) + errx(EX_DATAERR, "Invalid DSCP value"); + } + tent->value = code; + break; + default: + errx(EX_OSERR, "Unsupported format type %d", vtype); + } +#endif +} + +/* + * Compare table names. + * Honor number comparison. + */ +static int +tablename_cmp(const void *a, const void *b) +{ + ipfw_xtable_info *ia, *ib; + + ia = (ipfw_xtable_info *)a; + ib = (ipfw_xtable_info *)b; + + return (stringnum_cmp(ia->tablename, ib->tablename)); +} + +/* + * Retrieves table list from kernel, + * optionally sorts it and calls requested function for each table. + * Returns 0 on success. + */ +static int +tables_foreach(table_cb_t *f, void *arg, int sort) +{ + ipfw_obj_lheader *olh; + ipfw_xtable_info *info; + size_t sz; + int i, error; + + /* Start with reasonable default */ + sz = sizeof(*olh) + 16 * sizeof(ipfw_xtable_info); + + for (;;) { + if ((olh = calloc(1, sz)) == NULL) + return (ENOMEM); + + olh->size = sz; + error = do_get3(IP_FW_TABLES_XLIST, &olh->opheader, &sz); + if (error == ENOMEM) { + sz = olh->size; + free(olh); + continue; + } else if (error != 0) { + free(olh); + return (error); + } + + if (sort != 0) + qsort(olh + 1, olh->count, olh->objsize, tablename_cmp); + + info = (ipfw_xtable_info *)(olh + 1); + for (i = 0; i < olh->count; i++) { + error = f(info, arg); /* Ignore errors for now */ + info = (ipfw_xtable_info *)((caddr_t)info + olh->objsize); + } + + free(olh); + break; + } + + return (0); +} + + +/* + * Retrieves all entries for given table @i in + * eXtended format. Allocate buffer large enough + * to store result. Called needs to free it later. + * + * Returns 0 on success. + */ +static int +table_do_get_list(ipfw_xtable_info *i, ipfw_obj_header **poh) +{ + ipfw_obj_header *oh; + size_t sz; + int error, c; + + sz = 0; + oh = NULL; + error = 0; + for (c = 0; c < 8; c++) { + if (sz < i->size) + sz = i->size + 44; + if (oh != NULL) + free(oh); + if ((oh = calloc(1, sz)) == NULL) + continue; + table_fill_objheader(oh, i); + oh->opheader.version = 1; /* Current version */ + error = do_get3(IP_FW_TABLE_XLIST, &oh->opheader, &sz); + + if (error == 0) { + *poh = oh; + return (0); + } + + if (error != ENOMEM) + break; + } + free(oh); + + return (error); +} + +/* + * Shows all entries from @oh in human-readable format + */ +static void +table_show_list(ipfw_obj_header *oh, int need_header) +{ + ipfw_obj_tentry *tent; + uint32_t count; + ipfw_xtable_info *i; + + i = (ipfw_xtable_info *)(oh + 1); + tent = (ipfw_obj_tentry *)(i + 1); + + if (need_header) + printf("--- table(%s), set(%u) ---\n", i->tablename, i->set); + + count = i->count; + while (count > 0) { + table_show_entry(i, tent); + tent = (ipfw_obj_tentry *)((caddr_t)tent + tent->head.length); + count--; + } +} + +static void +table_show_entry(ipfw_xtable_info *i, ipfw_obj_tentry *tent) +{ + char *comma, tbuf[128], pval[32]; + void *paddr; + uint32_t tval; + struct tflow_entry *tfe; + + tval = tent->value; + + if (co.do_value_as_ip || i->vftype == IPFW_VFTYPE_IP) { + tval = htonl(tval); + inet_ntop(AF_INET, &tval, pval, sizeof(pval)); + } else + snprintf(pval, sizeof(pval), "%u", tval); + + switch (i->type) { + case IPFW_TABLE_CIDR: + /* IPv4 or IPv6 prefixes */ + inet_ntop(tent->subtype, &tent->k, tbuf, sizeof(tbuf)); + printf("%s/%u %s\n", tbuf, tent->masklen, pval); + break; + case IPFW_TABLE_INTERFACE: + /* Interface names */ + printf("%s %s\n", tent->k.iface, pval); + break; + case IPFW_TABLE_NUMBER: + /* numbers */ + printf("%u %s\n", tent->k.key, pval); + break; + case IPFW_TABLE_FLOW: + /* flows */ + tfe = &tent->k.flow; + comma = ""; + + if ((i->tflags & IPFW_TFFLAG_SRCIP) != 0) { + if (tfe->af == AF_INET) + paddr = &tfe->a.a4.sip; + else + paddr = &tfe->a.a6.sip6; + + inet_ntop(tfe->af, paddr, tbuf, sizeof(tbuf)); + printf("%s%s", comma, tbuf); + comma = ","; + } + + if ((i->tflags & IPFW_TFFLAG_PROTO) != 0) { + printf("%s%d", comma, tfe->proto); + comma = ","; + } + + if ((i->tflags & IPFW_TFFLAG_SRCPORT) != 0) { + printf("%s%d", comma, ntohs(tfe->sport)); + comma = ","; + } + if ((i->tflags & IPFW_TFFLAG_DSTIP) != 0) { + if (tfe->af == AF_INET) + paddr = &tfe->a.a4.dip; + else + paddr = &tfe->a.a6.dip6; + + inet_ntop(tfe->af, paddr, tbuf, sizeof(tbuf)); + printf("%s%s", comma, tbuf); + comma = ","; + } + + if ((i->tflags & IPFW_TFFLAG_DSTPORT) != 0) { + printf("%s%d", comma, ntohs(tfe->dport)); + comma = ","; + } + + printf(" %s\n", pval); + } +} + +static int +table_do_get_algolist(ipfw_obj_lheader **polh) +{ + ipfw_obj_lheader req, *olh; + size_t sz; + int error; + + memset(&req, 0, sizeof(req)); + sz = sizeof(req); + + error = do_get3(IP_FW_TABLES_ALIST, &req.opheader, &sz); + if (error != 0 && error != ENOMEM) + return (error); + + sz = req.size; + if ((olh = calloc(1, sz)) == NULL) + return (ENOMEM); + + olh->size = sz; + if ((error = do_get3(IP_FW_TABLES_ALIST, &olh->opheader, &sz)) != 0) { + free(olh); + return (error); + } + + *polh = olh; + return (0); +} + +void +ipfw_list_ta(int ac, char *av[]) +{ + ipfw_obj_lheader *olh; + ipfw_ta_info *info; + int error, i; + const char *atype; + + error = table_do_get_algolist(&olh); + if (error != 0) + err(EX_OSERR, "Unable to request algorithm list"); + + info = (ipfw_ta_info *)(olh + 1); + for (i = 0; i < olh->count; i++) { + if ((atype = match_value(tabletypes, info->type)) == NULL) + atype = "unknown"; + printf("--- %s ---\n", info->algoname); + printf(" type: %s\n refcount: %u\n", atype, info->refcnt); + + info = (ipfw_ta_info *)((caddr_t)info + olh->objsize); + } + + free(olh); +} + +int +compare_ntlv(const void *_a, const void *_b) +{ + ipfw_obj_ntlv *a, *b; + + a = (ipfw_obj_ntlv *)_a; + b = (ipfw_obj_ntlv *)_b; + + if (a->set < b->set) + return (-1); + else if (a->set > b->set) + return (1); + + if (a->idx < b->idx) + return (-1); + else if (a->idx > b->idx) + return (1); + + return (0); +} + +int +compare_kntlv(const void *k, const void *v) +{ + ipfw_obj_ntlv *ntlv; + uint16_t key; + + key = *((uint16_t *)k); + ntlv = (ipfw_obj_ntlv *)v; + + if (key < ntlv->idx) + return (-1); + else if (key > ntlv->idx) + return (1); + + return (0); +} + +/* + * Finds table name in @ctlv by @idx. + * Uses the following facts: + * 1) All TLVs are the same size + * 2) Kernel implementation provides already sorted list. + * + * Returns table name or NULL. + */ +char * +table_search_ctlv(ipfw_obj_ctlv *ctlv, uint16_t idx) +{ + ipfw_obj_ntlv *ntlv; + + ntlv = bsearch(&idx, (ctlv + 1), ctlv->count, ctlv->objsize, + compare_kntlv); + + if (ntlv != 0) + return (ntlv->name); + + return (NULL); +} + +void +table_sort_ctlv(ipfw_obj_ctlv *ctlv) +{ + + qsort(ctlv + 1, ctlv->count, ctlv->objsize, compare_ntlv); +} + +int +table_check_name(char *tablename) +{ + int c, i, l; + + /* + * Check if tablename is null-terminated and contains + * valid symbols only. Valid mask is: + * [a-zA-Z0-9\-_\.]{1,63} + */ + l = strlen(tablename); + if (l == 0 || l >= 64) + return (EINVAL); + for (i = 0; i < l; i++) { + c = tablename[i]; + if (isalpha(c) || isdigit(c) || c == '_' || + c == '-' || c == '.') + continue; + return (EINVAL); + } + + /* Restrict some 'special' names */ + if (strcmp(tablename, "all") == 0) + return (EINVAL); + + return (0); +} + diff --git a/sys/conf/files b/sys/conf/files index fac81b8..7faba40 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3454,6 +3454,8 @@ netpfil/ipfw/ip_fw_log.c optional inet ipfirewall netpfil/ipfw/ip_fw_pfil.c optional inet ipfirewall netpfil/ipfw/ip_fw_sockopt.c optional inet ipfirewall netpfil/ipfw/ip_fw_table.c optional inet ipfirewall +netpfil/ipfw/ip_fw_table_algo.c optional inet ipfirewall +netpfil/ipfw/ip_fw_iface.c optional inet ipfirewall netpfil/ipfw/ip_fw_nat.c optional inet ipfirewall_nat netpfil/pf/if_pflog.c optional pflog pf inet netpfil/pf/if_pfsync.c optional pfsync pf inet diff --git a/sys/modules/ipfw/Makefile b/sys/modules/ipfw/Makefile index 6920e6a..9a90c4a 100644 --- a/sys/modules/ipfw/Makefile +++ b/sys/modules/ipfw/Makefile @@ -1,11 +1,13 @@ # $FreeBSD$ +.include <src.opts.mk> + .PATH: ${.CURDIR}/../../netpfil/ipfw KMOD= ipfw SRCS= ip_fw2.c ip_fw_pfil.c SRCS+= ip_fw_dynamic.c ip_fw_log.c -SRCS+= ip_fw_sockopt.c ip_fw_table.c +SRCS+= ip_fw_sockopt.c ip_fw_table.c ip_fw_table_algo.c ip_fw_iface.c SRCS+= opt_inet.h opt_inet6.h opt_ipdivert.h opt_ipfw.h opt_ipsec.h CFLAGS+= -DIPFIREWALL @@ -18,4 +20,15 @@ CFLAGS+= -DIPFIREWALL #CFLAGS+= -DIPFIREWALL_DEFAULT_TO_ACCEPT # +.if !defined(KERNBUILDDIR) +.if ${MK_INET_SUPPORT} != "no" +opt_inet.h: + echo "#define INET 1" > ${.TARGET} +.endif +.if ${MK_INET6_SUPPORT} != "no" +opt_inet6.h: + echo "#define INET6 1" > ${.TARGET} +.endif +.endif + .include <bsd.kmod.mk> diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index dae8cc0..e5e1179 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -36,6 +36,9 @@ */ #define IPFW_DEFAULT_RULE 65535 +#define RESVD_SET 31 /*set for default and persistent rules*/ +#define IPFW_MAX_SETS 32 /* Number of sets supported by ipfw*/ + /* * Default number of ipfw tables. */ @@ -44,17 +47,17 @@ /* * Most commands (queue, pipe, tag, untag, limit...) can have a 16-bit - * argument between 1 and 65534. The value 0 is unused, the value - * 65535 (IP_FW_TABLEARG) is used to represent 'tablearg', i.e. the - * can be 1..65534, or 65535 to indicate the use of a 'tablearg' + * argument between 1 and 65534. The value 0 (IP_FW_TARG) is used + * to represent 'tablearg' value, e.g. indicate the use of a 'tablearg' * result of the most recent table() lookup. * Note that 16bit is only a historical limit, resulting from * the use of a 16-bit fields for that value. In reality, we can have - * 2^32 pipes, queues, tag values and so on, and use 0 as a tablearg. + * 2^32 pipes, queues, tag values and so on. */ #define IPFW_ARG_MIN 1 #define IPFW_ARG_MAX 65534 -#define IP_FW_TABLEARG 65535 /* XXX should use 0 */ +#define IP_FW_TABLEARG 65535 /* Compat value for old clients */ +#define IP_FW_TARG 0 /* Current tablearg value */ /* * Number of entries in the call stack of the call/return commands. @@ -65,15 +68,61 @@ /* IP_FW3 header/opcodes */ typedef struct _ip_fw3_opheader { uint16_t opcode; /* Operation opcode */ - uint16_t reserved[3]; /* Align to 64-bit boundary */ + uint16_t version; /* Opcode version */ + uint16_t reserved[2]; /* Align to 64-bit boundary */ } ip_fw3_opheader; /* IPFW extented tables support */ #define IP_FW_TABLE_XADD 86 /* add entry */ #define IP_FW_TABLE_XDEL 87 /* delete entry */ -#define IP_FW_TABLE_XGETSIZE 88 /* get table size */ +#define IP_FW_TABLE_XGETSIZE 88 /* get table size (deprecated) */ #define IP_FW_TABLE_XLIST 89 /* list table contents */ +#define IP_FW_TABLE_XDESTROY 90 /* destroy table */ +#define IP_FW_TABLES_XLIST 92 /* list all tables */ +#define IP_FW_TABLE_XINFO 93 /* request info for one table */ +#define IP_FW_TABLE_XFLUSH 94 /* flush table data */ +#define IP_FW_TABLE_XCREATE 95 /* create new table */ +#define IP_FW_TABLE_XMODIFY 96 /* modify existing table */ +#define IP_FW_XGET 97 /* Retrieve configuration */ +#define IP_FW_XADD 98 /* add rule */ +#define IP_FW_XDEL 99 /* del rule */ +#define IP_FW_XMOVE 100 /* move rules to different set */ +#define IP_FW_XZERO 101 /* clear accounting */ +#define IP_FW_XRESETLOG 102 /* zero rules logs */ +#define IP_FW_SET_SWAP 103 /* Swap between 2 sets */ +#define IP_FW_SET_MOVE 104 /* Move one set to another one */ +#define IP_FW_SET_ENABLE 105 /* Enable/disable sets */ +#define IP_FW_TABLE_XFIND 106 /* finds an entry */ +#define IP_FW_XIFLIST 107 /* list tracked interfaces */ +#define IP_FW_TABLES_ALIST 108 /* list table algorithms */ +#define IP_FW_TABLE_XSWAP 109 /* swap two tables */ + +/* + * Usage guidelines: + * + * IP_FW_TABLE_XLIST(ver 1): Dumps all table data + * Request(getsockopt): [ ipfw_obj_lheader ], size = ipfw_xtable_info.size + * Reply: [ ipfw_obj_lheader ipfw_xtable_info ipfw_table_xentry x N ] + * + * IP_FW_TABLE_XDESTROY: Destroys given table + * Request(setsockopt): [ ipfw_obj_header ] + * + * IP_FW_TABLES_XGETSIZE: Get buffer size needed to list info for all tables. + * Request(getsockopt): [ empty ], size = sizeof(ipfw_obj_lheader) + * Reply: [ ipfw_obj_lheader ] + * + * IP_FW_TABLES_XLIST: Lists all tables currently available in kernel. + * Request(getsockopt): [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size + * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ] + * + * IP_FW_TABLE_XINFO: Store table info to buffer. + * Request(getsockopt): [ ipfw_obj_header ipfw_xtable_info(empty)] + * Reply: [ ipfw_obj_header ipfw_xtable_info ] + * + * IP_FW_TABLE_XFLUSH: Removes all data from given table leaving type etc.. + * Request(setsockopt): [ ipfw_obj_header ] + */ /* * The kernel representation of ipfw rules is made of a list of @@ -220,6 +269,7 @@ enum ipfw_opcodes { /* arguments (4 byte each) */ O_DSCP, /* 2 u32 = DSCP mask */ O_SETDSCP, /* arg1=DSCP value */ + O_IP_FLOW_LOOKUP, /* arg1=table number, u32=value */ O_LAST_OPCODE /* not an opcode! */ }; @@ -341,6 +391,7 @@ typedef struct _ipfw_insn_if { union { struct in_addr ip; int glob; + uint16_t kidx; } p; char name[IFNAMSIZ]; } ipfw_insn_if; @@ -471,15 +522,17 @@ typedef struct _ipfw_insn_icmp6 { /* * Here we have the structure representing an ipfw rule. * - * It starts with a general area (with link fields and counters) - * followed by an array of one or more instructions, which the code - * accesses as an array of 32-bit values. - * - * Given a rule pointer r: + * Layout: + * struct ip_fw_rule + * [ counter block, size = rule->cntr_len ] + * [ one or more instructions, size = rule->cmd_len * 4 ] * - * r->cmd is the start of the first instruction. - * ACTION_PTR(r) is the start of the first action (things to do - * once a rule matched). + * It starts with a general area (with link fields). + * Counter block may be next (if rule->cntr_len > 0), + * followed by an array of one or more instructions, which the code + * accesses as an array of 32-bit values. rule->cmd_len represents + * the total instructions legth in u32 worrd, while act_ofs represents + * rule action offset in u32 words. * * When assembling instruction, remember the following: * @@ -490,11 +543,41 @@ typedef struct _ipfw_insn_icmp6 { * + if a rule has an "altq" option, it comes after "log" * + if a rule has an O_TAG option, it comes after "log" and "altq" * - * NOTE: we use a simple linked list of rules because we never need - * to delete a rule without scanning the list. We do not use - * queue(3) macros for portability and readability. + * + * All structures (excluding instructions) are u64-aligned. + * Please keep this. */ +struct ip_fw_rule { + uint16_t act_ofs; /* offset of action in 32-bit units */ + uint16_t cmd_len; /* # of 32-bit words in cmd */ + uint16_t spare; + uint8_t set; /* rule set (0..31) */ + uint8_t flags; /* rule flags */ + uint32_t rulenum; /* rule number */ + uint32_t id; /* rule id */ + + ipfw_insn cmd[1]; /* storage for commands */ +}; +#define IPFW_RULE_NOOPT 0x01 /* Has no options in body */ + +/* Unaligned version */ + +/* Base ipfw rule counter block. */ +struct ip_fw_bcounter { + uint16_t size; /* Size of counter block, bytes */ + uint8_t flags; /* flags for given block */ + uint8_t spare; + uint32_t timestamp; /* tv_sec of last match */ + uint64_t pcnt; /* Packet counter */ + uint64_t bcnt; /* Byte counter */ +}; + + +#ifndef _KERNEL +/* + * Legacy rule format + */ struct ip_fw { struct ip_fw *x_next; /* linked list of rules */ struct ip_fw *next_rule; /* ptr to next [skipto] rule */ @@ -503,8 +586,7 @@ struct ip_fw { uint16_t act_ofs; /* offset of action in 32-bit units */ uint16_t cmd_len; /* # of 32-bit words in cmd */ uint16_t rulenum; /* rule number */ - uint8_t set; /* rule set (0..31) */ -#define RESVD_SET 31 /* set for default and persistent rules */ + uint8_t set; /* rule set (0..31) */ uint8_t _pad; /* padding */ uint32_t id; /* rule id */ @@ -515,12 +597,13 @@ struct ip_fw { ipfw_insn cmd[1]; /* storage for commands */ }; +#endif #define ACTION_PTR(rule) \ (ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) ) -#define RULESIZE(rule) (sizeof(struct ip_fw) + \ - ((struct ip_fw *)(rule))->cmd_len * 4 - 4) +#define RULESIZE(rule) (sizeof(*(rule)) + (rule)->cmd_len * 4 - 4) + #if 1 // should be moved to in.h /* @@ -600,7 +683,16 @@ struct _ipfw_dyn_rule { #define IPFW_TABLE_CIDR 1 /* Table for holding IPv4/IPv6 prefixes */ #define IPFW_TABLE_INTERFACE 2 /* Table for holding interface names */ -#define IPFW_TABLE_MAXTYPE 2 /* Maximum valid number */ +#define IPFW_TABLE_NUMBER 3 /* Table for holding ports/uid/gid/etc */ +#define IPFW_TABLE_FLOW 4 /* Table for holding flow data */ +#define IPFW_TABLE_MAXTYPE 4 /* Maximum valid number */ + +/* Value types */ +#define IPFW_VTYPE_U32 1 /* Skipto/tablearg integer */ + +/* Value format types */ +#define IPFW_VFTYPE_U32 0 /* Skipto/tablearg integer */ +#define IPFW_VFTYPE_IP 1 /* Nexthop IP address */ typedef struct _ipfw_table_entry { in_addr_t addr; /* network address */ @@ -632,7 +724,7 @@ typedef struct _ipfw_table { } ipfw_table; typedef struct _ipfw_xtable { - ip_fw3_opheader opheader; /* eXtended tables are controlled via IP_FW3 */ + ip_fw3_opheader opheader; /* IP_FW3 opcode */ uint32_t size; /* size of entries in bytes */ uint32_t cnt; /* # of entries */ uint16_t tbl; /* table number */ @@ -640,4 +732,216 @@ typedef struct _ipfw_xtable { ipfw_table_xentry xent[0]; /* entries */ } ipfw_xtable; +typedef struct _ipfw_obj_tlv { + uint16_t type; /* TLV type */ + uint16_t flags; /* TLV-specific flags */ + uint32_t length; /* Total length, aligned to u64 */ +} ipfw_obj_tlv; +#define IPFW_TLV_TBL_NAME 1 +#define IPFW_TLV_TBLNAME_LIST 2 +#define IPFW_TLV_RULE_LIST 3 +#define IPFW_TLV_DYNSTATE_LIST 4 +#define IPFW_TLV_TBL_ENT 5 +#define IPFW_TLV_DYN_ENT 6 +#define IPFW_TLV_RULE_ENT 7 +#define IPFW_TLV_TBLENT_LIST 8 +#define IPFW_TLV_RANGE 9 + +/* Object name TLV */ +typedef struct _ipfw_obj_ntlv { + ipfw_obj_tlv head; /* TLV header */ + uint16_t idx; /* Name index */ + uint8_t spare; /* unused */ + uint8_t type; /* object type, if applicable */ + uint32_t set; /* set, if applicable */ + char name[64]; /* Null-terminated name */ +} ipfw_obj_ntlv; + +/* IPv4/IPv6 L4 flow description */ +struct tflow_entry { + uint8_t af; + uint8_t proto; + uint16_t spare; + uint16_t sport; + uint16_t dport; + union { + struct { + struct in_addr sip; + struct in_addr dip; + } a4; + struct { + struct in6_addr sip6; + struct in6_addr dip6; + } a6; + } a; +}; + +/* Table entry TLV */ +typedef struct _ipfw_obj_tentry { + ipfw_obj_tlv head; /* TLV header */ + uint8_t subtype; /* subtype (IPv4,IPv6) */ + uint8_t masklen; /* mask length */ + uint16_t idx; /* Table name index */ + uint32_t value; /* value */ + uint8_t result; /* request result */ + uint8_t spare0; + uint16_t spare1; + uint32_t spare2; + union { + /* Longest field needs to be aligned by 8-byte boundary */ + struct in_addr addr; /* IPv4 address */ + uint32_t key; /* uid/gid/port */ + struct in6_addr addr6; /* IPv6 address */ + char iface[IF_NAMESIZE]; /* interface name */ + struct tflow_entry flow; + } k; +} ipfw_obj_tentry; +#define IPFW_TF_UPDATE 0x01 /* Update record if exists */ +/* Container TLV */ +#define IPFW_CTF_ATOMIC 0x01 /* Perform atomic operation */ +/* Operation results */ +#define IPFW_TR_IGNORED 0 /* Entry was ignored (rollback) */ +#define IPFW_TR_ADDED 1 /* Entry was succesfully added */ +#define IPFW_TR_UPDATED 2 /* Entry was succesfully updated*/ +#define IPFW_TR_DELETED 3 /* Entry was succesfully deleted*/ +#define IPFW_TR_LIMIT 4 /* Entry was ignored (limit) */ +#define IPFW_TR_NOTFOUND 5 /* Entry was not found */ +#define IPFW_TR_EXISTS 6 /* Entry already exists */ +#define IPFW_TR_ERROR 7 /* Request has failed (unknown) */ + +typedef struct _ipfw_obj_dyntlv { + ipfw_obj_tlv head; + ipfw_dyn_rule state; +} ipfw_obj_dyntlv; +#define IPFW_DF_LAST 0x01 /* Last state in chain */ + +/* Containter TLVs */ +typedef struct _ipfw_obj_ctlv { + ipfw_obj_tlv head; /* TLV header */ + uint32_t count; /* Number of sub-TLVs */ + uint16_t objsize; /* Single object size */ + uint8_t version; /* TLV version */ + uint8_t flags; /* TLV-specific flags */ +} ipfw_obj_ctlv; + +/* Range TLV */ +typedef struct _ipfw_range_tlv { + ipfw_obj_tlv head; /* TLV header */ + uint32_t flags; /* Range flags */ + uint16_t start_rule; /* Range start */ + uint16_t end_rule; /* Range end */ + uint32_t set; /* Range set to match */ + uint32_t new_set; /* New set to move/swap to */ +} ipfw_range_tlv; +#define IPFW_RCFLAG_RANGE 0x01 /* rule range is set */ +#define IPFW_RCFLAG_ALL 0x02 /* match ALL rules */ +#define IPFW_RCFLAG_SET 0x04 /* match rules in given set */ + +typedef struct _ipfw_ta_tinfo { + uint32_t flags; /* Format flags */ + uint32_t spare; + uint8_t taclass4; /* algorithm class */ + uint8_t spare4; + uint16_t itemsize4; /* item size in runtime */ + uint32_t size4; /* runtime structure size */ + uint32_t count4; /* number of items in runtime */ + uint8_t taclass6; /* algorithm class */ + uint8_t spare6; + uint16_t itemsize6; /* item size in runtime */ + uint32_t size6; /* runtime structure size */ + uint32_t count6; /* number of items in runtime */ +} ipfw_ta_tinfo; +#define IPFW_TACLASS_HASH 1 /* algo is based on hash */ +#define IPFW_TACLASS_ARRAY 2 /* algo is based on array */ +#define IPFW_TACLASS_RADIX 3 /* algo is based on radix tree */ + +#define IPFW_TATFLAGS_DATA 0x0001 /* Has data filled in */ +#define IPFW_TATFLAGS_AFDATA 0x0002 /* Separate data per AF */ +#define IPFW_TATFLAGS_AFITEM 0x0004 /* diff. items per AF */ + +typedef struct _ipfw_xtable_info { + uint8_t type; /* table type (cidr,iface,..) */ + uint8_t tflags; /* type flags */ + uint8_t vtype; /* value type (u32) */ + uint8_t vftype; /* value format type (ip,number)*/ + uint16_t mflags; /* modification flags */ + uint16_t flags; /* generic table flags */ + uint32_t set; /* set table is in */ + uint32_t kidx; /* kernel index */ + uint32_t refcnt; /* number of references */ + uint32_t count; /* Number of records */ + uint32_t size; /* Total size of records(export)*/ + uint32_t limit; /* Max number of records */ + char tablename[64]; /* table name */ + char algoname[64]; /* algorithm name */ + ipfw_ta_tinfo ta_info; /* additional algo stats */ +} ipfw_xtable_info; +/* Generic table flags */ +#define IPFW_TGFLAGS_LOCKED 0x01 /* Tables is locked from changes*/ +/* Table type-specific flags */ +#define IPFW_TFFLAG_SRCIP 0x01 +#define IPFW_TFFLAG_DSTIP 0x02 +#define IPFW_TFFLAG_SRCPORT 0x04 +#define IPFW_TFFLAG_DSTPORT 0x08 +#define IPFW_TFFLAG_PROTO 0x10 +/* Table modification flags */ +#define IPFW_TMFLAGS_FTYPE 0x0001 /* Change ftype field */ +#define IPFW_TMFLAGS_LIMIT 0x0002 /* Change limit value */ +#define IPFW_TMFLAGS_LOCK 0x0004 /* Change table lock state */ + +typedef struct _ipfw_iface_info { + char ifname[64]; /* interface name */ + uint32_t ifindex; /* interface index */ + uint32_t flags; /* flags */ + uint32_t refcnt; /* number of references */ + uint32_t gencnt; /* number of changes */ + uint64_t spare; +} ipfw_iface_info; +#define IPFW_IFFLAG_RESOLVED 0x01 /* Interface exists */ + +typedef struct _ipfw_ta_info { + char algoname[64]; /* algorithm name */ + uint32_t type; /* lookup type */ + uint32_t flags; + uint32_t refcnt; + uint32_t spare0; + uint64_t spare1; +} ipfw_ta_info; + +#define IPFW_OBJTYPE_TABLE 1 +typedef struct _ipfw_obj_header { + ip_fw3_opheader opheader; /* IP_FW3 opcode */ + uint32_t spare; + uint16_t idx; /* object name index */ + uint8_t objtype; /* object type */ + uint8_t objsubtype; /* object subtype */ + ipfw_obj_ntlv ntlv; /* object name tlv */ +} ipfw_obj_header; + +typedef struct _ipfw_obj_lheader { + ip_fw3_opheader opheader; /* IP_FW3 opcode */ + uint32_t set_mask; /* disabled set mask */ + uint32_t count; /* Total objects count */ + uint32_t size; /* Total size (incl. header) */ + uint32_t objsize; /* Size of one object */ +} ipfw_obj_lheader; + +#define IPFW_CFG_GET_STATIC 0x01 +#define IPFW_CFG_GET_STATES 0x02 +#define IPFW_CFG_GET_COUNTERS 0x04 +typedef struct _ipfw_cfg_lheader { + ip_fw3_opheader opheader; /* IP_FW3 opcode */ + uint32_t set_mask; /* enabled set mask */ + uint32_t spare; + uint32_t flags; /* Request flags */ + uint32_t size; /* neded buffer size */ + uint32_t start_rule; + uint32_t end_rule; +} ipfw_cfg_lheader; + +typedef struct _ipfw_range_header { + ip_fw3_opheader opheader; /* IP_FW3 opcode */ + ipfw_range_tlv range; +} ipfw_range_header; + #endif /* _IPFW2_H */ diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c index 9190091..90e77b0 100644 --- a/sys/netpfil/ipfw/ip_fw2.c +++ b/sys/netpfil/ipfw/ip_fw2.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/condvar.h> +#include <sys/counter.h> #include <sys/eventhandler.h> #include <sys/malloc.h> #include <sys/mbuf.h> @@ -121,9 +122,16 @@ VNET_DEFINE(int, autoinc_step); VNET_DEFINE(int, fw_one_pass) = 1; VNET_DEFINE(unsigned int, fw_tables_max); +VNET_DEFINE(unsigned int, fw_tables_sets) = 0; /* Don't use set-aware tables */ /* Use 128 tables by default */ static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT; +static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, + int tablearg, int jump_backwards); +static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, + int tablearg, int jump_backwards); +#define JUMP(ch, f, num, targ, back) jump_fast(ch, f, num, targ, back) + /* * Each rule belongs to one of 32 different sets (0..31). * The variable set_disable contains one bit per set. @@ -156,6 +164,7 @@ ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; #ifdef SYSCTL_NODE uint32_t dummy_def = IPFW_DEFAULT_RULE; static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS); +static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS); SYSBEGIN(f3) @@ -177,7 +186,10 @@ SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, "The default/max possible rule number."); SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, tables_max, CTLTYPE_UINT|CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU", - "Maximum number of tables"); + "Maximum number of concurrently used tables"); +SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets, + CTLTYPE_UINT|CTLFLAG_RW, 0, 0, sysctl_ipfw_tables_sets, "IU", + "Use per-set namespace for tables"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, &default_to_accept, 0, "Make the default rule accept all packets."); @@ -351,15 +363,18 @@ tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) } static int -iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, uint32_t *tablearg) +iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, + uint32_t *tablearg) { + if (ifp == NULL) /* no iface with this packet, match fails */ - return 0; + return (0); + /* Check by name or by IP address */ if (cmd->name[0] != '\0') { /* match by name */ if (cmd->name[0] == '\1') /* use tablearg to match */ - return ipfw_lookup_table_extended(chain, cmd->p.glob, - ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE); + return ipfw_lookup_table_extended(chain, cmd->p.kidx, 0, + &ifp->if_index, tablearg); /* Check name */ if (cmd->p.glob) { if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) @@ -788,7 +803,7 @@ set_match(struct ip_fw_args *args, int slot, /* * Helper function to enable cached rule lookups using - * x_next and next_rule fields in ipfw rule. + * cached_id and cached_pos fields in ipfw rule. */ static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, @@ -796,22 +811,25 @@ jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, { int f_pos; - /* If possible use cached f_pos (in f->next_rule), - * whose version is written in f->next_rule + /* If possible use cached f_pos (in f->cached_pos), + * whose version is written in f->cached_id * (horrible hacks to avoid changing the ABI). */ - if (num != IP_FW_TABLEARG && (uintptr_t)f->x_next == chain->id) - f_pos = (uintptr_t)f->next_rule; + if (num != IP_FW_TARG && f->cached_id == chain->id) + f_pos = f->cached_pos; else { int i = IP_FW_ARG_TABLEARG(num); /* make sure we do not jump backward */ if (jump_backwards == 0 && i <= f->rulenum) i = f->rulenum + 1; - f_pos = ipfw_find_rule(chain, i, 0); + if (chain->idxmap != NULL) + f_pos = chain->idxmap[i]; + else + f_pos = ipfw_find_rule(chain, i, 0); /* update the cache */ - if (num != IP_FW_TABLEARG) { - f->next_rule = (void *)(uintptr_t)f_pos; - f->x_next = (void *)(uintptr_t)chain->id; + if (num != IP_FW_TARG) { + f->cached_id = chain->id; + f->cached_pos = f_pos; } } @@ -819,6 +837,24 @@ jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, } /* + * Helper function to enable real fast rule lookups. + */ +static int +jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, + int tablearg, int jump_backwards) +{ + int f_pos; + + num = IP_FW_ARG_TABLEARG(num); + /* make sure we do not jump backward */ + if (jump_backwards == 0 && num <= f->rulenum) + num = f->rulenum + 1; + f_pos = chain->idxmap[num]; + + return (f_pos); +} + +/* * The main check routine for the firewall. * * All arguments are in args so we can modify them and return them @@ -1464,9 +1500,9 @@ do { \ proto != IPPROTO_UDP) break; else if (v == 2) - key = htonl(dst_port); + key = dst_port; else if (v == 3) - key = htonl(src_port); + key = src_port; #ifndef USERSPACE else if (v == 4 || v == 5) { check_uidgid( @@ -1485,7 +1521,6 @@ do { \ else if (v == 5 /* O_JAIL */) key = ucred_cache.xid; #endif /* !__FreeBSD__ */ - key = htonl(key); } else #endif /* !USERSPACE */ break; @@ -1504,8 +1539,9 @@ do { \ void *pkey = (cmd->opcode == O_IP_DST_LOOKUP) ? &args->f_id.dst_ip6: &args->f_id.src_ip6; match = ipfw_lookup_table_extended(chain, - cmd->arg1, pkey, &v, - IPFW_TABLE_CIDR); + cmd->arg1, + sizeof(struct in6_addr), + pkey, &v); if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) match = ((ipfw_insn_u32 *)cmd)->d[0] == v; if (match) @@ -1513,6 +1549,17 @@ do { \ } break; + case O_IP_FLOW_LOOKUP: + { + uint32_t v = 0; + match = ipfw_lookup_table_extended(chain, + cmd->arg1, 0, &args->f_id, &v); + if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) + match = ((ipfw_insn_u32 *)cmd)->d[0] == v; + if (match) + tablearg = v; + } + break; case O_IP_SRC_MASK: case O_IP_DST_MASK: if (is_ipv4) { @@ -2166,7 +2213,7 @@ do { \ case O_SKIPTO: IPFW_INC_RULE_COUNTER(f, pktlen); - f_pos = jump_fast(chain, f, cmd->arg1, tablearg, 0); + f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0); /* * Skip disabled rules, and re-enter * the inner loop with the correct @@ -2255,7 +2302,7 @@ do { \ if (IS_CALL) { stack[mtag->m_tag_id] = f->rulenum; mtag->m_tag_id++; - f_pos = jump_fast(chain, f, cmd->arg1, + f_pos = JUMP(chain, f, cmd->arg1, tablearg, 1); } else { /* `return' action */ mtag->m_tag_id--; @@ -2376,7 +2423,7 @@ do { \ uint32_t fib; IPFW_INC_RULE_COUNTER(f, pktlen); - fib = IP_FW_ARG_TABLEARG(cmd->arg1); + fib = IP_FW_ARG_TABLEARG(cmd->arg1) & 0x7FFFF; if (fib >= rt_numfibs) fib = 0; M_SETFIB(m, fib); @@ -2437,7 +2484,7 @@ do { \ retval = IP_FW_DENY; break; } - if (cmd->arg1 != IP_FW_TABLEARG) + if (cmd->arg1 != IP_FW_TARG) ((ipfw_insn_nat *)cmd)->nat = t; } retval = ipfw_nat_ptr(args, t, m); @@ -2546,7 +2593,27 @@ sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) return (ipfw_resize_tables(&V_layer3_chain, ntables)); } + +/* + * Switches table namespace between global and per-set. + */ +static int +sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS) +{ + int error; + unsigned int sets; + + sets = V_fw_tables_sets; + + error = sysctl_handle_int(oidp, &sets, 0, req); + /* Read operation or some error */ + if ((error != 0) || (req->newptr == NULL)) + return (error); + + return (ipfw_switch_tables_namespace(&V_layer3_chain, sets)); +} #endif + /* * Module and VNET glue */ @@ -2601,6 +2668,7 @@ ipfw_init(void) default_fw_tables = IPFW_TABLES_MAX; ipfw_log_bpf(1); /* init */ + ipfw_iface_init(); return (error); } @@ -2611,6 +2679,7 @@ static void ipfw_destroy(void) { + ipfw_iface_destroy(); ipfw_log_bpf(0); /* uninit */ printf("IP firewall unloaded\n"); } @@ -2641,12 +2710,11 @@ vnet_ipfw_init(const void *unused) LIST_INIT(&chain->nat); #endif + ipfw_init_counters(); /* insert the default rule and create the initial map */ chain->n_rules = 1; - chain->static_len = sizeof(struct ip_fw); chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO); - if (chain->map) - rule = malloc(chain->static_len, M_IPFW, M_WAITOK | M_ZERO); + rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw)); /* Set initial number of tables */ V_fw_tables_max = default_fw_tables; @@ -2667,9 +2735,12 @@ vnet_ipfw_init(const void *unused) rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; chain->default_rule = chain->map[0] = rule; chain->id = rule->id = 1; + /* Pre-calculate rules length for legacy dump format */ + chain->static_len = sizeof(struct ip_fw_rule0); IPFW_LOCK_INIT(chain); ipfw_dyn_init(chain); + ipfw_init_skipto_cache(chain); /* First set up some values that are compile time options */ V_ipfw_vnet_ready = 1; /* Open for business */ @@ -2687,7 +2758,7 @@ vnet_ipfw_init(const void *unused) * In layer2 we have the same behaviour, except that V_ether_ipfw * is checked on each packet because there are no pfil hooks. */ - V_ip_fw_ctl_ptr = ipfw_ctl; + V_ip_fw_ctl_ptr = ipfw_ctl3; error = ipfw_attach_hooks(1); return (error); } @@ -2698,7 +2769,7 @@ vnet_ipfw_init(const void *unused) static int vnet_ipfw_uninit(const void *unused) { - struct ip_fw *reap, *rule; + struct ip_fw *reap; struct ip_fw_chain *chain = &V_layer3_chain; int i; @@ -2718,23 +2789,22 @@ vnet_ipfw_uninit(const void *unused) ipfw_dyn_uninit(0); /* run the callout_drain */ IPFW_WUNLOCK(chain); - ipfw_destroy_tables(chain); reap = NULL; IPFW_WLOCK(chain); - for (i = 0; i < chain->n_rules; i++) { - rule = chain->map[i]; - rule->x_next = reap; - reap = rule; - } - if (chain->map) - free(chain->map, M_IPFW); + for (i = 0; i < chain->n_rules; i++) + ipfw_reap_add(chain, &reap, chain->map[i]); + free(chain->map, M_IPFW); + ipfw_destroy_skipto_cache(chain); IPFW_WUNLOCK(chain); IPFW_UH_WUNLOCK(chain); + ipfw_destroy_tables(chain); if (reap != NULL) ipfw_reap_rules(reap); + vnet_ipfw_iface_destroy(chain); IPFW_LOCK_DESTROY(chain); ipfw_dyn_uninit(1); /* free the remaining parts */ - return 0; + ipfw_destroy_counters(); + return (0); } /* diff --git a/sys/netpfil/ipfw/ip_fw_dynamic.c b/sys/netpfil/ipfw/ip_fw_dynamic.c index af546f4..ba16634 100644 --- a/sys/netpfil/ipfw/ip_fw_dynamic.c +++ b/sys/netpfil/ipfw/ip_fw_dynamic.c @@ -196,8 +196,7 @@ static int ipfw_dyn_count; /* number of objects */ static int last_log; /* Log ratelimiting */ static void ipfw_dyn_tick(void *vnetx); -static void check_dyn_rules(struct ip_fw_chain *, struct ip_fw *, - int, int, int); +static void check_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *, int, int); #ifdef SYSCTL_NODE static int sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS); @@ -720,7 +719,7 @@ ipfw_install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, conn_limit = IP_FW_ARG_TABLEARG(cmd->conn_limit); DEB( - if (cmd->conn_limit == IP_FW_TABLEARG) + if (cmd->conn_limit == IP_FW_TARG) printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " "(tablearg)\n", __func__, conn_limit); else @@ -1008,7 +1007,7 @@ ipfw_dyn_tick(void * vnetx) check_ka = 1; } - check_dyn_rules(chain, NULL, RESVD_SET, check_ka, 1); + check_dyn_rules(chain, NULL, check_ka, 1); callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, vnetx, 0); @@ -1040,8 +1039,8 @@ ipfw_dyn_tick(void * vnetx) * are not freed by other instance (see stage 2, 3) */ static void -check_dyn_rules(struct ip_fw_chain *chain, struct ip_fw *rule, - int set, int check_ka, int timer) +check_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt, + int check_ka, int timer) { struct mbuf *m0, *m, *mnext, **mtailp; struct ip *h; @@ -1105,12 +1104,10 @@ check_dyn_rules(struct ip_fw_chain *chain, struct ip_fw *rule, /* * Remove rules which are: * 1) expired - * 2) created by given rule - * 3) created by any rule in given set + * 2) matches deletion range */ if ((TIME_LEQ(q->expire, time_uptime)) || - ((rule != NULL) && (q->rule == rule)) || - ((set != RESVD_SET) && (q->rule->set == set))) { + (rt != NULL && ipfw_match_range(q->rule, rt))) { if (TIME_LE(time_uptime, q->expire) && q->dyn_type == O_KEEP_STATE && V_dyn_keep_states != 0) { @@ -1324,8 +1321,7 @@ check_dyn_rules(struct ip_fw_chain *chain, struct ip_fw *rule, * Deletes all dynamic rules originated by given rule or all rules in * given set. Specify RESVD_SET to indicate set should not be used. * @chain - pointer to current ipfw rules chain - * @rule - delete all states originated by given rule if != NULL - * @set - delete all states originated by any rule in set @set if != RESVD_SET + * @rr - delete all states originated by rules in matched range. * * Function has to be called with IPFW_UH_WLOCK held. * Additionally, function assume that dynamic rule/set is @@ -1333,10 +1329,39 @@ check_dyn_rules(struct ip_fw_chain *chain, struct ip_fw *rule, * 'deleted' rules. */ void -ipfw_expire_dyn_rules(struct ip_fw_chain *chain, struct ip_fw *rule, int set) +ipfw_expire_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt) { - check_dyn_rules(chain, rule, set, 0, 0); + check_dyn_rules(chain, rt, 0, 0); +} + +/* + * Check if rule contains at least one dynamic opcode. + * + * Returns 1 if such opcode is found, 0 otherwise. + */ +int +ipfw_is_dyn_rule(struct ip_fw *rule) +{ + int cmdlen, l; + ipfw_insn *cmd; + + l = rule->cmd_len; + cmd = rule->cmd; + cmdlen = 0; + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + switch (cmd->opcode) { + case O_LIMIT: + case O_KEEP_STATE: + case O_PROBE_STATE: + case O_CHECK_STATE: + return (1); + } + } + + return (0); } void @@ -1444,7 +1469,7 @@ sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS) #endif /* - * Returns number of dynamic rules. + * Returns size of dynamic states in legacy format */ int ipfw_dyn_len(void) @@ -1455,7 +1480,92 @@ ipfw_dyn_len(void) } /* - * Fill given buffer with dynamic states. + * Returns number of dynamic states. + * Used by dump format v1 (current). + */ +int +ipfw_dyn_get_count(void) +{ + + return (V_ipfw_dyn_v == NULL) ? 0 : DYN_COUNT; +} + +static void +export_dyn_rule(ipfw_dyn_rule *src, ipfw_dyn_rule *dst) +{ + + memcpy(dst, src, sizeof(*src)); + memcpy(&(dst->rule), &(src->rule->rulenum), sizeof(src->rule->rulenum)); + /* + * store set number into high word of + * dst->rule pointer. + */ + memcpy((char *)&dst->rule + sizeof(src->rule->rulenum), + &(src->rule->set), sizeof(src->rule->set)); + /* + * store a non-null value in "next". + * The userland code will interpret a + * NULL here as a marker + * for the last dynamic rule. + */ + memcpy(&dst->next, &dst, sizeof(dst)); + dst->expire = + TIME_LEQ(dst->expire, time_uptime) ? 0 : dst->expire - time_uptime; +} + +/* + * Fills int buffer given by @sd with dynamic states. + * Used by dump format v1 (current). + * + * Returns 0 on success. + */ +int +ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd) +{ + ipfw_dyn_rule *p; + ipfw_obj_dyntlv *dst, *last; + ipfw_obj_ctlv *ctlv; + int i; + size_t sz; + + if (V_ipfw_dyn_v == NULL) + return (0); + + IPFW_UH_RLOCK_ASSERT(chain); + + ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); + if (ctlv == NULL) + return (ENOMEM); + sz = sizeof(ipfw_obj_dyntlv); + ctlv->head.type = IPFW_TLV_DYNSTATE_LIST; + ctlv->objsize = sz; + last = NULL; + + for (i = 0 ; i < V_curr_dyn_buckets; i++) { + IPFW_BUCK_LOCK(i); + for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) { + dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd, sz); + if (dst == NULL) { + IPFW_BUCK_UNLOCK(i); + return (ENOMEM); + } + + export_dyn_rule(p, &dst->state); + dst->head.length = sz; + dst->head.type = IPFW_TLV_DYN_ENT; + last = dst; + } + IPFW_BUCK_UNLOCK(i); + } + + if (last != NULL) /* mark last dynamic rule */ + last->head.flags = IPFW_DF_LAST; + + return (0); +} + +/* + * Fill given buffer with dynamic states (legacy format). * IPFW_UH_RLOCK has to be held while calling. */ void @@ -1477,28 +1587,9 @@ ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep) if (bp + sizeof *p <= ep) { ipfw_dyn_rule *dst = (ipfw_dyn_rule *)bp; - bcopy(p, dst, sizeof *p); - bcopy(&(p->rule->rulenum), &(dst->rule), - sizeof(p->rule->rulenum)); - /* - * store set number into high word of - * dst->rule pointer. - */ - bcopy(&(p->rule->set), - (char *)&dst->rule + - sizeof(p->rule->rulenum), - sizeof(p->rule->set)); - /* - * store a non-null value in "next". - * The userland code will interpret a - * NULL here as a marker - * for the last dynamic rule. - */ - bcopy(&dst, &dst->next, sizeof(dst)); + + export_dyn_rule(p, dst); last = dst; - dst->expire = - TIME_LEQ(dst->expire, time_uptime) ? - 0 : dst->expire - time_uptime ; bp += sizeof(ipfw_dyn_rule); } } diff --git a/sys/netpfil/ipfw/ip_fw_iface.c b/sys/netpfil/ipfw/ip_fw_iface.c new file mode 100644 index 0000000..e9b61ce --- /dev/null +++ b/sys/netpfil/ipfw/ip_fw_iface.c @@ -0,0 +1,526 @@ +/*- + * Copyright (c) 2014 Yandex LLC. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: projects/ipfw/sys/netpfil/ipfw/ip_fw_iface.c 267384 2014-06-12 09:59:11Z melifaro $"); + +/* + * Kernel interface tracking API. + * + */ + +#include "opt_ipfw.h" +#include "opt_inet.h" +#ifndef INET +#error IPFIREWALL requires INET. +#endif /* INET */ +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/rwlock.h> +#include <sys/socket.h> +#include <sys/queue.h> +#include <sys/eventhandler.h> +#include <net/if.h> +#include <net/if_var.h> +#include <net/vnet.h> + +#include <netinet/in.h> +#include <netinet/ip_var.h> /* struct ipfw_rule_ref */ +#include <netinet/ip_fw.h> + +#include <netpfil/ipfw/ip_fw_private.h> + +#define CHAIN_TO_II(ch) ((struct namedobj_instance *)ch->ifcfg) + +#define DEFAULT_IFACES 128 + +static void handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif, + uint16_t ifindex); +static void handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif, + uint16_t ifindex); + +/* + * FreeBSD Kernel interface. + */ +static void ipfw_kifhandler(void *arg, struct ifnet *ifp); +static int ipfw_kiflookup(char *name); +static void iface_khandler_register(void); +static void iface_khandler_deregister(void); + +static eventhandler_tag ipfw_ifdetach_event, ipfw_ifattach_event; +static int num_vnets = 0; +struct mtx vnet_mtx; + +/* + * Checks if kernel interface is contained in our tracked + * interface list and calls attach/detach handler. + */ +static void +ipfw_kifhandler(void *arg, struct ifnet *ifp) +{ + struct ip_fw_chain *ch; + struct ipfw_iface *iif; + struct namedobj_instance *ii; + uintptr_t htype; + + ch = &V_layer3_chain; + htype = (uintptr_t)arg; + + if (ch == NULL) + return; + + IPFW_UH_WLOCK(ch); + ii = CHAIN_TO_II(ch); + if (ii == NULL) { + IPFW_UH_WUNLOCK(ch); + return; + } + iif = (struct ipfw_iface*)ipfw_objhash_lookup_name(ii, 0,ifp->if_xname); + if (iif != NULL) { + if (htype == 1) + handle_ifattach(ch, iif, ifp->if_index); + else + handle_ifdetach(ch, iif, ifp->if_index); + } + IPFW_UH_WUNLOCK(ch); +} + +/* + * Reference current VNET as iface tracking API user. + * Registers interface tracking handlers for first VNET. + */ +static void +iface_khandler_register() +{ + int create; + + create = 0; + + mtx_lock(&vnet_mtx); + if (num_vnets == 0) + create = 1; + num_vnets++; + mtx_unlock(&vnet_mtx); + + if (create == 0) + return; + + printf("IPFW: starting up interface tracker\n"); + + ipfw_ifdetach_event = EVENTHANDLER_REGISTER( + ifnet_departure_event, ipfw_kifhandler, NULL, + EVENTHANDLER_PRI_ANY); + ipfw_ifattach_event = EVENTHANDLER_REGISTER( + ifnet_arrival_event, ipfw_kifhandler, (void*)((uintptr_t)1), + EVENTHANDLER_PRI_ANY); +} + +/* + * + * Detach interface event handlers on last VNET instance + * detach. + */ +static void +iface_khandler_deregister() +{ + int destroy; + + destroy = 0; + mtx_lock(&vnet_mtx); + if (--num_vnets == 0) + destroy = 1; + mtx_unlock(&vnet_mtx); + + if (destroy == 0) + return; + + EVENTHANDLER_DEREGISTER(ifnet_arrival_event, + ipfw_ifattach_event); + EVENTHANDLER_DEREGISTER(ifnet_departure_event, + ipfw_ifdetach_event); +} + +/* + * Retrieves ifindex for given @name. + * + * Returns ifindex or 0. + */ +static int +ipfw_kiflookup(char *name) +{ + struct ifnet *ifp; + int ifindex; + + ifindex = 0; + + if ((ifp = ifunit_ref(name)) != NULL) { + ifindex = ifp->if_index; + if_rele(ifp); + } + + return (ifindex); +} + +/* + * Global ipfw startup hook. + * Since we perform lazy initialization, do nothing except + * mutex init. + */ +int +ipfw_iface_init() +{ + + mtx_init(&vnet_mtx, "IPFW ifhandler mtx", NULL, MTX_DEF); + return (0); +} + +/* + * Global ipfw destroy hook. + * Unregister khandlers iff init has been done. + */ +void +ipfw_iface_destroy() +{ + + mtx_destroy(&vnet_mtx); +} + +/* + * Perform actual init on internal request. + * Inits both namehash and global khandler. + */ +static void +vnet_ipfw_iface_init(struct ip_fw_chain *ch) +{ + struct namedobj_instance *ii; + + ii = ipfw_objhash_create(DEFAULT_IFACES); + IPFW_UH_WLOCK(ch); + if (ch->ifcfg == NULL) { + ch->ifcfg = ii; + ii = NULL; + } + IPFW_UH_WUNLOCK(ch); + + if (ii != NULL) { + /* Already initialized. Free namehash. */ + ipfw_objhash_destroy(ii); + } else { + /* We're the first ones. Init kernel hooks. */ + iface_khandler_register(); + } +} + +static void +destroy_iface(struct namedobj_instance *ii, struct named_object *no, + void *arg) +{ + struct ipfw_iface *iif; + struct ip_fw_chain *ch; + + ch = (struct ip_fw_chain *)arg; + iif = (struct ipfw_iface *)no; + + /* Assume all consumers have been already detached */ + free(iif, M_IPFW); +} + +/* + * Per-VNET ipfw detach hook. + * + */ +void +vnet_ipfw_iface_destroy(struct ip_fw_chain *ch) +{ + struct namedobj_instance *ii; + + IPFW_UH_WLOCK(ch); + ii = CHAIN_TO_II(ch); + ch->ifcfg = NULL; + IPFW_UH_WUNLOCK(ch); + + if (ii != NULL) { + ipfw_objhash_foreach(ii, destroy_iface, ch); + ipfw_objhash_destroy(ii); + iface_khandler_deregister(); + } +} + +/* + * Notify the subsystem that we are interested in tracking + * interface @name. This function has to be called without + * holding any locks to permit allocating the necessary states + * for proper interface tracking. + * + * Returns 0 on success. + */ +int +ipfw_iface_ref(struct ip_fw_chain *ch, char *name, + struct ipfw_ifc *ic) +{ + struct namedobj_instance *ii; + struct ipfw_iface *iif, *tmp; + + if (strlen(name) >= sizeof(iif->ifname)) + return (EINVAL); + + IPFW_UH_WLOCK(ch); + + ii = CHAIN_TO_II(ch); + if (ii == NULL) { + + /* + * First request to subsystem. + * Let's perform init. + */ + IPFW_UH_WUNLOCK(ch); + vnet_ipfw_iface_init(ch); + IPFW_UH_WLOCK(ch); + ii = CHAIN_TO_II(ch); + } + + iif = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name); + + if (iif != NULL) { + iif->no.refcnt++; + ic->iface = iif; + IPFW_UH_WUNLOCK(ch); + return (0); + } + + IPFW_UH_WUNLOCK(ch); + + /* Not found. Let's create one */ + iif = malloc(sizeof(struct ipfw_iface), M_IPFW, M_WAITOK | M_ZERO); + TAILQ_INIT(&iif->consumers); + iif->no.name = iif->ifname; + strlcpy(iif->ifname, name, sizeof(iif->ifname)); + + /* + * Ref & link to the list. + * + * We assume ifnet_arrival_event / ifnet_departure_event + * are not holding any locks. + */ + iif->no.refcnt = 1; + IPFW_UH_WLOCK(ch); + + tmp = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name); + if (tmp != NULL) { + /* Interface has been created since unlock. Ref and return */ + tmp->no.refcnt++; + ic->iface = tmp; + IPFW_UH_WUNLOCK(ch); + free(iif, M_IPFW); + return (0); + } + + iif->ifindex = ipfw_kiflookup(name); + if (iif->ifindex != 0) + iif->resolved = 1; + + ipfw_objhash_add(ii, &iif->no); + ic->iface = iif; + + IPFW_UH_WUNLOCK(ch); + + return (0); +} + +/* + * Adds @ic to the list of iif interface consumers. + * Must be called with holding both UH+WLOCK. + * Callback may be immediately called (if interface exists). + */ +void +ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic) +{ + struct ipfw_iface *iif; + + IPFW_UH_WLOCK_ASSERT(ch); + IPFW_WLOCK_ASSERT(ch); + + iif = ic->iface; + + TAILQ_INSERT_TAIL(&iif->consumers, ic, next); + if (iif->resolved != 0) + ic->cb(ch, ic->cbdata, iif->ifindex); +} + +/* + * Unlinks interface tracker object @ic from interface. + * Must be called whi holding UH lock. + */ +void +ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic) +{ + struct ipfw_iface *iif; + + IPFW_UH_WLOCK_ASSERT(ch); + + iif = ic->iface; + if (ic->linked != 0) + TAILQ_REMOVE(&iif->consumers, ic, next); +} + +/* + * Unreference interface specified by @ic. + * Must be called without holding any locks. + */ +void +ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic) +{ + struct ipfw_iface *iif; + + iif = ic->iface; + ic->iface = NULL; + + IPFW_UH_WLOCK(ch); + iif->no.refcnt--; + /* TODO: check for references & delete */ + IPFW_UH_WUNLOCK(ch); +} + +/* + * Interface arrival handler. + */ +static void +handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif, + uint16_t ifindex) +{ + struct ipfw_ifc *ic; + + IPFW_UH_WLOCK_ASSERT(ch); + + iif->gencnt++; + iif->resolved = 1; + iif->ifindex = ifindex; + + IPFW_WLOCK(ch); + TAILQ_FOREACH(ic, &iif->consumers, next) + ic->cb(ch, ic->cbdata, iif->ifindex); + IPFW_WUNLOCK(ch); +} + +/* + * Interface departure handler. + */ +static void +handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif, + uint16_t ifindex) +{ + struct ipfw_ifc *ic; + + IPFW_UH_WLOCK_ASSERT(ch); + + IPFW_WLOCK(ch); + TAILQ_FOREACH(ic, &iif->consumers, next) + ic->cb(ch, ic->cbdata, 0); + IPFW_WUNLOCK(ch); + + iif->gencnt++; + iif->resolved = 0; + iif->ifindex = 0; +} + +struct dump_iface_args { + struct ip_fw_chain *ch; + struct sockopt_data *sd; +}; + +static void +export_iface_internal(struct namedobj_instance *ii, struct named_object *no, + void *arg) +{ + ipfw_iface_info *i; + struct dump_iface_args *da; + struct ipfw_iface *iif; + + da = (struct dump_iface_args *)arg; + + i = (ipfw_iface_info *)ipfw_get_sopt_space(da->sd, sizeof(*i)); + KASSERT(i != 0, ("previously checked buffer is not enough")); + + iif = (struct ipfw_iface *)no; + + strlcpy(i->ifname, iif->ifname, sizeof(i->ifname)); + if (iif->resolved) + i->flags |= IPFW_IFFLAG_RESOLVED; + i->ifindex = iif->ifindex; + i->refcnt = iif->no.refcnt; + i->gencnt = iif->gencnt; +} + +/* + * Lists all interface currently tracked by ipfw. + * Data layout (v0)(current): + * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size + * Reply: [ ipfw_obj_lheader ipfw_iface_info x N ] + * + * Returns 0 on success + */ +int +ipfw_list_ifaces(struct ip_fw_chain *ch, struct sockopt_data *sd) +{ + struct _ipfw_obj_lheader *olh; + struct dump_iface_args da; + uint32_t count, size; + + olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); + if (olh == NULL) + return (EINVAL); + if (sd->valsize < olh->size) + return (EINVAL); + + IPFW_UH_RLOCK(ch); + count = ipfw_objhash_count(CHAIN_TO_II(ch)); + size = count * sizeof(ipfw_iface_info) + sizeof(ipfw_obj_lheader); + + /* Fill in header regadless of buffer size */ + olh->count = count; + olh->objsize = sizeof(ipfw_iface_info); + + if (size > olh->size) { + olh->size = size; + IPFW_UH_RUNLOCK(ch); + return (ENOMEM); + } + olh->size = size; + + da.ch = ch; + da.sd = sd; + + ipfw_objhash_foreach(CHAIN_TO_II(ch), export_iface_internal, &da); + IPFW_UH_RUNLOCK(ch); + + return (0); +} + + diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h index e4a2f31..8eb1785 100644 --- a/sys/netpfil/ipfw/ip_fw_private.h +++ b/sys/netpfil/ipfw/ip_fw_private.h @@ -176,7 +176,9 @@ enum { /* result for matching dynamic rules */ * Eventually we may implement it with a callback on the function. */ struct ip_fw_chain; -void ipfw_expire_dyn_rules(struct ip_fw_chain *, struct ip_fw *, int); +struct sockopt_data; +int ipfw_is_dyn_rule(struct ip_fw *rule); +void ipfw_expire_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *); void ipfw_dyn_unlock(ipfw_dyn_rule *q); struct tcphdr; @@ -188,10 +190,12 @@ ipfw_dyn_rule *ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, struct tcphdr *tcp); void ipfw_remove_dyn_children(struct ip_fw *rule); void ipfw_get_dynamic(struct ip_fw_chain *chain, char **bp, const char *ep); +int ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd); void ipfw_dyn_init(struct ip_fw_chain *); /* per-vnet initialization */ void ipfw_dyn_uninit(int); /* per-vnet deinitialization */ int ipfw_dyn_len(void); +int ipfw_dyn_get_count(void); /* common variables */ VNET_DECLARE(int, fw_one_pass); @@ -212,23 +216,67 @@ VNET_DECLARE(int, autoinc_step); VNET_DECLARE(unsigned int, fw_tables_max); #define V_fw_tables_max VNET(fw_tables_max) +VNET_DECLARE(unsigned int, fw_tables_sets); +#define V_fw_tables_sets VNET(fw_tables_sets) + +struct tables_config; + +#ifdef _KERNEL +typedef struct ip_fw_cntr { + uint64_t pcnt; /* Packet counter */ + uint64_t bcnt; /* Byte counter */ + uint64_t timestamp; /* tv_sec of last match */ +} ip_fw_cntr; + +/* + * Here we have the structure representing an ipfw rule. + * + * It starts with a general area + * followed by an array of one or more instructions, which the code + * accesses as an array of 32-bit values. + * + * Given a rule pointer r: + * + * r->cmd is the start of the first instruction. + * ACTION_PTR(r) is the start of the first action (things to do + * once a rule matched). + */ + +struct ip_fw { + uint16_t act_ofs; /* offset of action in 32-bit units */ + uint16_t cmd_len; /* # of 32-bit words in cmd */ + uint16_t rulenum; /* rule number */ + uint8_t set; /* rule set (0..31) */ + uint8_t flags; /* currently unused */ + counter_u64_t cntr; /* Pointer to rule counters */ + uint32_t timestamp; /* tv_sec of last match */ + uint32_t id; /* rule id */ + uint32_t cached_id; /* used by jump_fast */ + uint32_t cached_pos; /* used by jump_fast */ + + ipfw_insn cmd[1]; /* storage for commands */ +}; + +#endif + struct ip_fw_chain { struct ip_fw **map; /* array of rule ptrs to ease lookup */ uint32_t id; /* ruleset id */ int n_rules; /* number of static rules */ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */ - struct radix_node_head **tables; /* IPv4 tables */ - struct radix_node_head **xtables; /* extended tables */ - uint8_t *tabletype; /* Array of table types */ + void *tablestate; /* runtime table info */ + int *idxmap; /* skipto array of rules */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t rwmtx; #else struct rwlock rwmtx; #endif - int static_len; /* total len of static rules */ + int static_len; /* total len of static rules (v0) */ uint32_t gencnt; /* NAT generation count */ - struct ip_fw *reap; /* list of rules to reap */ struct ip_fw *default_rule; + struct tables_config *tblcfg; /* tables module data */ + void *ifcfg; /* interface module data */ + int *idxmap_back; /* standby skipto array of rules */ #if defined( __linux__ ) || defined( _WIN32 ) spinlock_t uh_lock; #else @@ -236,9 +284,59 @@ struct ip_fw_chain { #endif }; +struct namedobj_instance; + +struct named_object { + TAILQ_ENTRY(named_object) nn_next; /* namehash */ + TAILQ_ENTRY(named_object) nv_next; /* valuehash */ + char *name; /* object name */ + uint8_t type; /* object type */ + uint8_t compat; /* Object name is number */ + uint16_t kidx; /* object kernel index */ + uint16_t uidx; /* userland idx for compat records */ + uint32_t set; /* set object belongs to */ + uint32_t refcnt; /* number of references */ +}; +TAILQ_HEAD(namedobjects_head, named_object); + struct sockopt; /* used by tcp_var.h */ +struct sockopt_data { + caddr_t kbuf; /* allocated buffer */ + size_t ksize; /* given buffer size */ + size_t koff; /* data already used */ + size_t kavail; /* number of bytes available */ + size_t ktotal; /* total bytes pushed */ + struct sockopt *sopt; /* socket data */ + caddr_t sopt_val; /* sopt user buffer */ + size_t valsize; /* original data size */ +}; + +struct ipfw_ifc; + +typedef void (ipfw_ifc_cb)(struct ip_fw_chain *ch, void *cbdata, + uint16_t ifindex); + +struct ipfw_iface { + struct named_object no; + char ifname[64]; + int resolved; + uint16_t ifindex; + uint16_t spare; + uint64_t gencnt; + TAILQ_HEAD(, ipfw_ifc) consumers; +}; + +struct ipfw_ifc { + TAILQ_ENTRY(ipfw_ifc) next; + struct ipfw_iface *iface; + ipfw_ifc_cb *cb; + void *cbdata; + int linked; + int spare; +}; /* Macro for working with various counters */ +#ifdef USERSPACE #define IPFW_INC_RULE_COUNTER(_cntr, _bytes) do { \ (_cntr)->pcnt++; \ (_cntr)->bcnt += _bytes; \ @@ -260,8 +358,33 @@ struct sockopt; /* used by tcp_var.h */ (_cntr)->pcnt = 0; \ (_cntr)->bcnt = 0; \ } while (0) +#else +#define IPFW_INC_RULE_COUNTER(_cntr, _bytes) do { \ + counter_u64_add((_cntr)->cntr, 1); \ + counter_u64_add((_cntr)->cntr + 1, _bytes); \ + if ((_cntr)->timestamp != time_uptime) \ + (_cntr)->timestamp = time_uptime; \ + } while (0) + +#define IPFW_INC_DYN_COUNTER(_cntr, _bytes) do { \ + (_cntr)->pcnt++; \ + (_cntr)->bcnt += _bytes; \ + } while (0) + +#define IPFW_ZERO_RULE_COUNTER(_cntr) do { \ + counter_u64_zero((_cntr)->cntr); \ + counter_u64_zero((_cntr)->cntr + 1); \ + (_cntr)->timestamp = 0; \ + } while (0) + +#define IPFW_ZERO_DYN_COUNTER(_cntr) do { \ + (_cntr)->pcnt = 0; \ + (_cntr)->bcnt = 0; \ + } while (0) +#endif + -#define IP_FW_ARG_TABLEARG(a) (((a) == IP_FW_TABLEARG) ? tablearg : (a)) +#define IP_FW_ARG_TABLEARG(a) (((a) == IP_FW_TARG) ? tablearg : (a)) /* * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c * so the variable and the macros must be here. @@ -295,32 +418,140 @@ struct sockopt; /* used by tcp_var.h */ #define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock) #define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock) +struct obj_idx { + uint16_t uidx; /* internal index supplied by userland */ + uint16_t kidx; /* kernel object index */ + uint16_t off; /* tlv offset from rule end in 4-byte words */ + uint8_t spare; + uint8_t type; /* object type within its category */ +}; + +struct rule_check_info { + uint16_t flags; /* rule-specific check flags */ + uint16_t table_opcodes; /* count of opcodes referencing table */ + uint16_t urule_numoff; /* offset of rulenum in bytes */ + uint8_t version; /* rule version */ + uint8_t spare; + ipfw_obj_ctlv *ctlv; /* name TLV containter */ + struct ip_fw *krule; /* resulting rule pointer */ + caddr_t urule; /* original rule pointer */ + struct obj_idx obuf[8]; /* table references storage */ +}; + +/* Legacy interface support */ +/* + * FreeBSD 8 export rule format + */ +struct ip_fw_rule0 { + struct ip_fw *x_next; /* linked list of rules */ + struct ip_fw *next_rule; /* ptr to next [skipto] rule */ + /* 'next_rule' is used to pass up 'set_disable' status */ + + uint16_t act_ofs; /* offset of action in 32-bit units */ + uint16_t cmd_len; /* # of 32-bit words in cmd */ + uint16_t rulenum; /* rule number */ + uint8_t set; /* rule set (0..31) */ + uint8_t _pad; /* padding */ + uint32_t id; /* rule id */ + + /* These fields are present in all rules. */ + uint64_t pcnt; /* Packet counter */ + uint64_t bcnt; /* Byte counter */ + uint32_t timestamp; /* tv_sec of last match */ + + ipfw_insn cmd[1]; /* storage for commands */ +}; + +struct ip_fw_bcounter0 { + uint64_t pcnt; /* Packet counter */ + uint64_t bcnt; /* Byte counter */ + uint32_t timestamp; /* tv_sec of last match */ +}; + +/* Kernel rule length */ +/* + * RULE _K_ SIZE _V_ -> + * get kernel size from userland rool version _V_. + * RULE _U_ SIZE _V_ -> + * get user size version _V_ from kernel rule + * RULESIZE _V_ -> + * get user size rule length + */ +/* FreeBSD8 <> current kernel format */ +#define RULEUSIZE0(r) (sizeof(struct ip_fw_rule0) + (r)->cmd_len * 4 - 4) +#define RULEKSIZE0(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8) +/* FreeBSD11 <> current kernel format */ +#define RULEUSIZE1(r) (roundup2(sizeof(struct ip_fw_rule) + \ + (r)->cmd_len * 4 - 4, 8)) +#define RULEKSIZE1(r) roundup2((sizeof(struct ip_fw) + (r)->cmd_len*4 - 4), 8) + + +/* In ip_fw_iface.c */ +int ipfw_iface_init(void); +void ipfw_iface_destroy(void); +void vnet_ipfw_iface_destroy(struct ip_fw_chain *ch); +int ipfw_iface_ref(struct ip_fw_chain *ch, char *name, + struct ipfw_ifc *ic); +void ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic); +void ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); +void ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic); +int ipfw_list_ifaces(struct ip_fw_chain *ch, struct sockopt_data *sd); + /* In ip_fw_sockopt.c */ +void ipfw_init_skipto_cache(struct ip_fw_chain *chain); +void ipfw_destroy_skipto_cache(struct ip_fw_chain *chain); int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id); -int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule); -int ipfw_ctl(struct sockopt *sopt); +int ipfw_ctl3(struct sockopt *sopt); int ipfw_chk(struct ip_fw_args *args); +void ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head, + struct ip_fw *rule); void ipfw_reap_rules(struct ip_fw *head); +void ipfw_init_counters(void); +void ipfw_destroy_counters(void); +struct ip_fw *ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize); +int ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt); + +caddr_t ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed); +caddr_t ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed); + +typedef void (objhash_cb_t)(struct namedobj_instance *ni, struct named_object *, + void *arg); +struct namedobj_instance *ipfw_objhash_create(uint32_t items); +void ipfw_objhash_destroy(struct namedobj_instance *); +void ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks); +void ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, + void **idx, int *blocks); +void ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, + void **idx, int *blocks); +void ipfw_objhash_bitmap_free(void *idx, int blocks); +struct named_object *ipfw_objhash_lookup_name(struct namedobj_instance *ni, + uint32_t set, char *name); +struct named_object *ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, + uint16_t idx); +int ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a, + struct named_object *b); +void ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no); +void ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no); +uint32_t ipfw_objhash_count(struct namedobj_instance *ni); +void ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, + void *arg); +int ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx); +int ipfw_objhash_alloc_idx(void *n, uint16_t *pidx); /* In ip_fw_table.c */ -struct radix_node; +struct table_info; + +typedef int (table_lookup_t)(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val); + int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val); -int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint32_t *val, int type); +int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, + void *paddr, uint32_t *val); int ipfw_init_tables(struct ip_fw_chain *ch); -void ipfw_destroy_tables(struct ip_fw_chain *ch); -int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl); -int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value); -int ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint8_t plen, uint8_t mlen, uint8_t type); -int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); -int ipfw_dump_table_entry(struct radix_node *rn, void *arg); -int ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl); -int ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); -int ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl); int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables); +int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets); +void ipfw_destroy_tables(struct ip_fw_chain *ch); /* In ip_fw_nat.c -- XXX to be moved to ip_var.h */ diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c index 3c342f7..5a7f896 100644 --- a/sys/netpfil/ipfw/ip_fw_sockopt.c +++ b/sys/netpfil/ipfw/ip_fw_sockopt.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include <sys/socketvar.h> #include <sys/sysctl.h> #include <sys/syslog.h> +#include <sys/fnv_hash.h> #include <net/if.h> #include <net/route.h> #include <net/vnet.h> @@ -62,17 +63,116 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip_fw.h> #include <netpfil/ipfw/ip_fw_private.h> +#include <netpfil/ipfw/ip_fw_table.h> #ifdef MAC #include <security/mac/mac_framework.h> #endif +static int ipfw_ctl(struct sockopt *sopt); +static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, + struct rule_check_info *ci); +static int check_ipfw_rule1(struct ip_fw_rule *rule, int size, + struct rule_check_info *ci); +static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, + struct rule_check_info *ci); + +#define NAMEDOBJ_HASH_SIZE 32 + +struct namedobj_instance { + struct namedobjects_head *names; + struct namedobjects_head *values; + uint32_t nn_size; /* names hash size */ + uint32_t nv_size; /* number hash size */ + u_long *idx_mask; /* used items bitmask */ + uint32_t max_blocks; /* number of "long" blocks in bitmask */ + uint32_t count; /* number of items */ + uint16_t free_off[IPFW_MAX_SETS]; /* first possible free offset */ +}; +#define BLOCK_ITEMS (8 * sizeof(u_long)) /* Number of items for ffsl() */ + +static uint32_t objhash_hash_name(struct namedobj_instance *ni, uint32_t set, + char *name); +static uint32_t objhash_hash_val(struct namedobj_instance *ni, uint32_t val); + +static int ipfw_flush_sopt_data(struct sockopt_data *sd); + MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); /* - * static variables followed by global ones (none in this file) + * static variables followed by global ones */ +#ifndef USERSPACE + +static VNET_DEFINE(uma_zone_t, ipfw_cntr_zone); +#define V_ipfw_cntr_zone VNET(ipfw_cntr_zone) + +void +ipfw_init_counters() +{ + + V_ipfw_cntr_zone = uma_zcreate("IPFW counters", + sizeof(ip_fw_cntr), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_PCPU); +} + +void +ipfw_destroy_counters() +{ + + uma_zdestroy(V_ipfw_cntr_zone); +} + +struct ip_fw * +ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize) +{ + struct ip_fw *rule; + + rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO); + rule->cntr = uma_zalloc(V_ipfw_cntr_zone, M_WAITOK | M_ZERO); + + return (rule); +} + +static void +free_rule(struct ip_fw *rule) +{ + + uma_zfree(V_ipfw_cntr_zone, rule->cntr); + free(rule, M_IPFW); +} +#else +void +ipfw_init_counters() +{ +} + +void +ipfw_destroy_counters() +{ +} + +struct ip_fw * +ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize) +{ + struct ip_fw *rule; + + rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO); + + return (rule); +} + +static void +free_rule(struct ip_fw *rule) +{ + + free(rule, M_IPFW); +} + +#endif + + /* * Find the smallest rule >= key, id. * We could use bsearch but it is so simple that we code it directly @@ -99,6 +199,104 @@ ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id) } /* + * Builds skipto cache on rule set @map. + */ +static void +update_skipto_cache(struct ip_fw_chain *chain, struct ip_fw **map) +{ + int *smap, rulenum; + int i, mi; + + IPFW_UH_WLOCK_ASSERT(chain); + + mi = 0; + rulenum = map[mi]->rulenum; + smap = chain->idxmap_back; + + if (smap == NULL) + return; + + for (i = 0; i < 65536; i++) { + smap[i] = mi; + /* Use the same rule index until i < rulenum */ + if (i != rulenum || i == 65535) + continue; + /* Find next rule with num > i */ + rulenum = map[++mi]->rulenum; + while (rulenum == i) + rulenum = map[++mi]->rulenum; + } +} + +/* + * Swaps prepared (backup) index with current one. + */ +static void +swap_skipto_cache(struct ip_fw_chain *chain) +{ + int *map; + + IPFW_UH_WLOCK_ASSERT(chain); + IPFW_WLOCK_ASSERT(chain); + + map = chain->idxmap; + chain->idxmap = chain->idxmap_back; + chain->idxmap_back = map; +} + +/* + * Allocate and initialize skipto cache. + */ +void +ipfw_init_skipto_cache(struct ip_fw_chain *chain) +{ + int *idxmap, *idxmap_back; + + idxmap = malloc(65536 * sizeof(uint32_t *), M_IPFW, + M_WAITOK | M_ZERO); + idxmap_back = malloc(65536 * sizeof(uint32_t *), M_IPFW, + M_WAITOK | M_ZERO); + + /* + * Note we may be called at any time after initialization, + * for example, on first skipto rule, so we need to + * provide valid chain->idxmap on return + */ + + IPFW_UH_WLOCK(chain); + if (chain->idxmap != NULL) { + IPFW_UH_WUNLOCK(chain); + free(idxmap, M_IPFW); + free(idxmap_back, M_IPFW); + return; + } + + /* Set backup pointer first to permit building cache */ + chain->idxmap_back = idxmap_back; + update_skipto_cache(chain, chain->map); + IPFW_WLOCK(chain); + /* It is now safe to set chain->idxmap ptr */ + chain->idxmap = idxmap; + swap_skipto_cache(chain); + IPFW_WUNLOCK(chain); + IPFW_UH_WUNLOCK(chain); +} + +/* + * Destroys skipto cache. + */ +void +ipfw_destroy_skipto_cache(struct ip_fw_chain *chain) +{ + + if (chain->idxmap != NULL) + free(chain->idxmap, M_IPFW); + if (chain->idxmap != NULL) + free(chain->idxmap_back, M_IPFW); +} + + +/* * allocate a new map, returns the chain locked. extra is the number * of entries to add or delete. */ @@ -108,11 +306,12 @@ get_map(struct ip_fw_chain *chain, int extra, int locked) for (;;) { struct ip_fw **map; - int i; + int i, mflags; + + mflags = M_ZERO | ((locked != 0) ? M_NOWAIT : M_WAITOK); i = chain->n_rules + extra; - map = malloc(i * sizeof(struct ip_fw *), M_IPFW, - locked ? M_NOWAIT : M_WAITOK); + map = malloc(i * sizeof(struct ip_fw *), M_IPFW, mflags); if (map == NULL) { printf("%s: cannot allocate map\n", __FUNCTION__); return NULL; @@ -141,67 +340,390 @@ swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len) chain->n_rules = new_len; old_map = chain->map; chain->map = new_map; + swap_skipto_cache(chain); IPFW_WUNLOCK(chain); return old_map; } + +static void +export_cntr1_base(struct ip_fw *krule, struct ip_fw_bcounter *cntr) +{ + + cntr->size = sizeof(*cntr); + + if (krule->cntr != NULL) { + cntr->pcnt = counter_u64_fetch(krule->cntr); + cntr->bcnt = counter_u64_fetch(krule->cntr + 1); + cntr->timestamp = krule->timestamp; + } + if (cntr->timestamp > 0) + cntr->timestamp += boottime.tv_sec; +} + +static void +export_cntr0_base(struct ip_fw *krule, struct ip_fw_bcounter0 *cntr) +{ + + if (krule->cntr != NULL) { + cntr->pcnt = counter_u64_fetch(krule->cntr); + cntr->bcnt = counter_u64_fetch(krule->cntr + 1); + cntr->timestamp = krule->timestamp; + } + if (cntr->timestamp > 0) + cntr->timestamp += boottime.tv_sec; +} + /* - * Add a new rule to the list. Copy the rule into a malloc'ed area, then - * possibly create a rule number and add the rule to the list. + * Copies rule @urule from v1 userland format (current). + * to kernel @krule. + * Assume @krule is zeroed. + */ +static void +import_rule1(struct rule_check_info *ci) +{ + struct ip_fw_rule *urule; + struct ip_fw *krule; + + urule = (struct ip_fw_rule *)ci->urule; + krule = (struct ip_fw *)ci->krule; + + /* copy header */ + krule->act_ofs = urule->act_ofs; + krule->cmd_len = urule->cmd_len; + krule->rulenum = urule->rulenum; + krule->set = urule->set; + krule->flags = urule->flags; + + /* Save rulenum offset */ + ci->urule_numoff = offsetof(struct ip_fw_rule, rulenum); + + /* Copy opcodes */ + memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); +} + +/* + * Export rule into v1 format (Current). + * Layout: + * [ ipfw_obj_tlv(IPFW_TLV_RULE_ENT) + * [ ip_fw_rule ] OR + * [ ip_fw_bcounter ip_fw_rule] (depends on rcntrs). + * ] + * Assume @data is zeroed. + */ +static void +export_rule1(struct ip_fw *krule, caddr_t data, int len, int rcntrs) +{ + struct ip_fw_bcounter *cntr; + struct ip_fw_rule *urule; + ipfw_obj_tlv *tlv; + + /* Fill in TLV header */ + tlv = (ipfw_obj_tlv *)data; + tlv->type = IPFW_TLV_RULE_ENT; + tlv->length = len; + + if (rcntrs != 0) { + /* Copy counters */ + cntr = (struct ip_fw_bcounter *)(tlv + 1); + urule = (struct ip_fw_rule *)(cntr + 1); + export_cntr1_base(krule, cntr); + } else + urule = (struct ip_fw_rule *)(tlv + 1); + + /* copy header */ + urule->act_ofs = krule->act_ofs; + urule->cmd_len = krule->cmd_len; + urule->rulenum = krule->rulenum; + urule->set = krule->set; + urule->flags = krule->flags; + urule->id = krule->id; + + /* Copy opcodes */ + memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); +} + + +/* + * Copies rule @urule from FreeBSD8 userland format (v0) + * to kernel @krule. + * Assume @krule is zeroed. + */ +static void +import_rule0(struct rule_check_info *ci) +{ + struct ip_fw_rule0 *urule; + struct ip_fw *krule; + int cmdlen, l; + ipfw_insn *cmd; + ipfw_insn_limit *lcmd; + ipfw_insn_if *cmdif; + + urule = (struct ip_fw_rule0 *)ci->urule; + krule = (struct ip_fw *)ci->krule; + + /* copy header */ + krule->act_ofs = urule->act_ofs; + krule->cmd_len = urule->cmd_len; + krule->rulenum = urule->rulenum; + krule->set = urule->set; + if ((urule->_pad & 1) != 0) + krule->flags |= IPFW_RULE_NOOPT; + + /* Save rulenum offset */ + ci->urule_numoff = offsetof(struct ip_fw_rule0, rulenum); + + /* Copy opcodes */ + memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); + + /* + * Alter opcodes: + * 1) convert tablearg value from 65335 to 0 + * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room for targ). + * 3) convert table number in iface opcodes to u16 + */ + l = krule->cmd_len; + cmd = krule->cmd; + cmdlen = 0; + + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + switch (cmd->opcode) { + /* Opcodes supporting tablearg */ + case O_TAG: + case O_TAGGED: + case O_PIPE: + case O_QUEUE: + case O_DIVERT: + case O_TEE: + case O_SKIPTO: + case O_CALLRETURN: + case O_NETGRAPH: + case O_NGTEE: + case O_NAT: + if (cmd->arg1 == 65535) + cmd->arg1 = IP_FW_TARG; + break; + case O_SETFIB: + case O_SETDSCP: + if (cmd->arg1 == 65535) + cmd->arg1 = IP_FW_TARG; + else + cmd->arg1 |= 0x8000; + break; + case O_LIMIT: + lcmd = (ipfw_insn_limit *)cmd; + if (lcmd->conn_limit == 65535) + lcmd->conn_limit = IP_FW_TARG; + break; + /* Interface tables */ + case O_XMIT: + case O_RECV: + case O_VIA: + /* Interface table, possibly */ + cmdif = (ipfw_insn_if *)cmd; + if (cmdif->name[0] != '\1') + break; + + cmdif->p.kidx = (uint16_t)cmdif->p.glob; + break; + } + } +} + +/* + * Copies rule @krule from kernel to FreeBSD8 userland format (v0) + */ +static void +export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len) +{ + int cmdlen, l; + ipfw_insn *cmd; + ipfw_insn_limit *lcmd; + ipfw_insn_if *cmdif; + + /* copy header */ + memset(urule, 0, len); + urule->act_ofs = krule->act_ofs; + urule->cmd_len = krule->cmd_len; + urule->rulenum = krule->rulenum; + urule->set = krule->set; + if ((krule->flags & IPFW_RULE_NOOPT) != 0) + urule->_pad |= 1; + + /* Copy opcodes */ + memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); + + /* Export counters */ + export_cntr0_base(krule, (struct ip_fw_bcounter0 *)&urule->pcnt); + + /* + * Alter opcodes: + * 1) convert tablearg value from 0 to 65335 + * 2) Remove highest bit from O_SETFIB/O_SETDSCP values. + * 3) convert table number in iface opcodes to int + */ + l = urule->cmd_len; + cmd = urule->cmd; + cmdlen = 0; + + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + switch (cmd->opcode) { + /* Opcodes supporting tablearg */ + case O_TAG: + case O_TAGGED: + case O_PIPE: + case O_QUEUE: + case O_DIVERT: + case O_TEE: + case O_SKIPTO: + case O_CALLRETURN: + case O_NETGRAPH: + case O_NGTEE: + case O_NAT: + if (cmd->arg1 == IP_FW_TARG) + cmd->arg1 = 65535; + break; + case O_SETFIB: + case O_SETDSCP: + if (cmd->arg1 == IP_FW_TARG) + cmd->arg1 = 65535; + else + cmd->arg1 &= ~0x8000; + break; + case O_LIMIT: + lcmd = (ipfw_insn_limit *)cmd; + if (lcmd->conn_limit == IP_FW_TARG) + lcmd->conn_limit = 65535; + break; + /* Interface tables */ + case O_XMIT: + case O_RECV: + case O_VIA: + /* Interface table, possibly */ + cmdif = (ipfw_insn_if *)cmd; + if (cmdif->name[0] != '\1') + break; + + cmdif->p.glob = cmdif->p.kidx; + break; + } + } +} + +/* + * Add new rule(s) to the list possibly creating rule number for each. * Update the rule_number in the input struct so the caller knows it as well. - * XXX DO NOT USE FOR THE DEFAULT RULE. * Must be called without IPFW_UH held */ -int -ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) +static int +commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count) { - struct ip_fw *rule; - int i, l, insert_before; + int error, i, insert_before, tcount; + uint16_t rulenum, *pnum; + struct rule_check_info *ci; + struct ip_fw *krule; struct ip_fw **map; /* the new array of pointers */ - if (chain->map == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE - 1) - return (EINVAL); + /* Check if we need to do table remap */ + tcount = 0; + for (ci = rci, i = 0; i < count; ci++, i++) { + if (ci->table_opcodes == 0) + continue; + + /* + * Rule has some table opcodes. + * Reference & allocate needed tables/ + */ + error = ipfw_rewrite_table_uidx(chain, ci); + if (error != 0) { + + /* + * rewrite failed, state for current rule + * has been reverted. Check if we need to + * revert more. + */ + if (tcount > 0) { + + /* + * We have some more table rules + * we need to rollback. + */ + + IPFW_UH_WLOCK(chain); + while (ci != rci) { + ci--; + if (ci->table_opcodes == 0) + continue; + ipfw_unref_rule_tables(chain,ci->krule); + + } + IPFW_UH_WUNLOCK(chain); + + } + + return (error); + } + + tcount++; + } - l = RULESIZE(input_rule); - rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO); /* get_map returns with IPFW_UH_WLOCK if successful */ - map = get_map(chain, 1, 0 /* not locked */); + map = get_map(chain, count, 0 /* not locked */); if (map == NULL) { - free(rule, M_IPFW); - return ENOSPC; - } + if (tcount > 0) { + /* Unbind tables */ + IPFW_UH_WLOCK(chain); + for (ci = rci, i = 0; i < count; ci++, i++) { + if (ci->table_opcodes == 0) + continue; + + ipfw_unref_rule_tables(chain, ci->krule); + } + IPFW_UH_WUNLOCK(chain); + } - bcopy(input_rule, rule, l); - /* clear fields not settable from userland */ - rule->x_next = NULL; - rule->next_rule = NULL; - IPFW_ZERO_RULE_COUNTER(rule); + return (ENOSPC); + } if (V_autoinc_step < 1) V_autoinc_step = 1; else if (V_autoinc_step > 1000) V_autoinc_step = 1000; + + /* FIXME: Handle count > 1 */ + ci = rci; + krule = ci->krule; + rulenum = krule->rulenum; + /* find the insertion point, we will insert before */ - insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE; + insert_before = rulenum ? rulenum + 1 : IPFW_DEFAULT_RULE; i = ipfw_find_rule(chain, insert_before, 0); /* duplicate first part */ if (i > 0) bcopy(chain->map, map, i * sizeof(struct ip_fw *)); - map[i] = rule; + map[i] = krule; /* duplicate remaining part, we always have the default rule */ bcopy(chain->map + i, map + i + 1, sizeof(struct ip_fw *) *(chain->n_rules - i)); - if (rule->rulenum == 0) { - /* write back the number */ - rule->rulenum = i > 0 ? map[i-1]->rulenum : 0; - if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) - rule->rulenum += V_autoinc_step; - input_rule->rulenum = rule->rulenum; + if (rulenum == 0) { + /* Compute rule number and write it back */ + rulenum = i > 0 ? map[i-1]->rulenum : 0; + if (rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) + rulenum += V_autoinc_step; + krule->rulenum = rulenum; + /* Save number to userland rule */ + pnum = (uint16_t *)((caddr_t)ci->urule + ci->urule_numoff); + *pnum = rulenum; } - rule->id = chain->id + 1; + krule->id = chain->id + 1; + update_skipto_cache(chain, map); map = swap_map(chain, map, chain->n_rules + 1); - chain->static_len += l; + chain->static_len += RULEUSIZE0(krule); IPFW_UH_WUNLOCK(chain); if (map) free(map, M_IPFW); @@ -209,6 +731,23 @@ ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) } /* + * Adds @rule to the list of rules to reap + */ +void +ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head, + struct ip_fw *rule) +{ + + IPFW_UH_WLOCK_ASSERT(chain); + + /* Unlink rule from everywhere */ + ipfw_unref_rule_tables(chain, rule); + + *((struct ip_fw **)rule) = *head; + *head = rule; +} + +/* * Reclaim storage associated with a list of rules. This is * typically the list created using remove_rule. * A NULL pointer on input is handled correctly. @@ -219,22 +758,12 @@ ipfw_reap_rules(struct ip_fw *head) struct ip_fw *rule; while ((rule = head) != NULL) { - head = head->x_next; - free(rule, M_IPFW); + head = *((struct ip_fw **)head); + free_rule(rule); } } /* - * Used by del_entry() to check if a rule should be kept. - * Returns 1 if the rule must be kept, 0 otherwise. - * - * Called with cmd = {0,1,5}. - * cmd == 0 matches on rule numbers, excludes rules in RESVD_SET if n == 0 ; - * cmd == 1 matches on set numbers only, rule numbers are ignored; - * cmd == 5 matches on rule and set numbers. - * - * n == 0 is a wildcard for rule numbers, there is no wildcard for sets. - * * Rules to keep are * (default || reserved || !match_set || !match_number) * where @@ -251,14 +780,384 @@ ipfw_reap_rules(struct ip_fw *head) * // number is ignored for cmd == 1 or n == 0 * */ +int +ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt) +{ + + /* Don't match default rule regardless of query */ + if (rule->rulenum == IPFW_DEFAULT_RULE) + return (0); + + /* Don't match rules in reserved set for flush requests */ + if ((rt->flags & IPFW_RCFLAG_ALL) != 0 && rule->set == RESVD_SET) + return (0); + + /* If we're filtering by set, don't match other sets */ + if ((rt->flags & IPFW_RCFLAG_SET) != 0 && rule->set != rt->set) + return (0); + + if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && + (rule->rulenum < rt->start_rule || rule->rulenum > rt->end_rule)) + return (0); + + return (1); +} + +/* + * Delete rules matching range @rt. + * Saves number of deleted rules in @ndel. + * + * Returns 0 on success. + */ +static int +delete_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int *ndel) +{ + struct ip_fw *reap, *rule, **map; + int end, start; + int i, n, ndyn, ofs; + + reap = NULL; + IPFW_UH_WLOCK(chain); /* arbitrate writers */ + + /* + * Stage 1: Determine range to inspect. + * Range is half-inclusive, e.g [start, end). + */ + start = 0; + end = chain->n_rules - 1; + + if ((rt->flags & IPFW_RCFLAG_RANGE) != 0) { + start = ipfw_find_rule(chain, rt->start_rule, 0); + + end = ipfw_find_rule(chain, rt->end_rule, 0); + if (rt->end_rule != IPFW_DEFAULT_RULE) + while (chain->map[end]->rulenum == rt->end_rule) + end++; + } + + /* Allocate new map of the same size */ + map = get_map(chain, 0, 1 /* locked */); + if (map == NULL) { + IPFW_UH_WUNLOCK(chain); + return (ENOMEM); + } + + n = 0; + ndyn = 0; + ofs = start; + /* 1. bcopy the initial part of the map */ + if (start > 0) + bcopy(chain->map, map, start * sizeof(struct ip_fw *)); + /* 2. copy active rules between start and end */ + for (i = start; i < end; i++) { + rule = chain->map[i]; + if (ipfw_match_range(rule, rt) == 0) { + map[ofs++] = rule; + continue; + } + + n++; + if (ipfw_is_dyn_rule(rule) != 0) + ndyn++; + } + /* 3. copy the final part of the map */ + bcopy(chain->map + end, map + ofs, + (chain->n_rules - end) * sizeof(struct ip_fw *)); + /* 4. recalculate skipto cache */ + update_skipto_cache(chain, map); + /* 5. swap the maps (under UH_WLOCK + WHLOCK) */ + map = swap_map(chain, map, chain->n_rules - n); + /* 6. Remove all dynamic states originated by deleted rules */ + if (ndyn > 0) + ipfw_expire_dyn_rules(chain, rt); + /* 7. now remove the rules deleted from the old map */ + for (i = start; i < end; i++) { + rule = map[i]; + if (ipfw_match_range(rule, rt) == 0) + continue; + chain->static_len -= RULEUSIZE0(rule); + ipfw_reap_add(chain, &reap, rule); + } + IPFW_UH_WUNLOCK(chain); + + ipfw_reap_rules(reap); + if (map != NULL) + free(map, M_IPFW); + *ndel = n; + return (0); +} + +/* + * Changes set of given rule rannge @rt + * with each other. + * + * Returns 0 on success. + */ +static int +move_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt) +{ + struct ip_fw *rule; + int i; + + IPFW_UH_WLOCK(chain); + + /* + * Move rules with matching paramenerts to a new set. + * This one is much more complex. We have to ensure + * that all referenced tables (if any) are referenced + * by given rule subset only. Otherwise, we can't move + * them to new set and have to return error. + */ + if (V_fw_tables_sets != 0) { + if (ipfw_move_tables_sets(chain, rt, rt->new_set) != 0) { + IPFW_UH_WUNLOCK(chain); + return (EBUSY); + } + } + + /* XXX: We have to do swap holding WLOCK */ + for (i = 0; i < chain->n_rules - 1; i++) { + rule = chain->map[i]; + if (ipfw_match_range(rule, rt) == 0) + continue; + rule->set = rt->new_set; + } + + IPFW_UH_WUNLOCK(chain); + + return (0); +} + +/* + * Clear counters for a specific rule. + * Normally run under IPFW_UH_RLOCK, but these are idempotent ops + * so we only care that rules do not disappear. + */ +static void +clear_counters(struct ip_fw *rule, int log_only) +{ + ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); + + if (log_only == 0) + IPFW_ZERO_RULE_COUNTER(rule); + if (l->o.opcode == O_LOG) + l->log_left = l->max_log; +} + +/* + * Flushes rules counters and/or log values on matching range. + * + * Returns number of items cleared. + */ +static int +clear_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int log_only) +{ + struct ip_fw *rule; + int num; + int i; + + num = 0; + + IPFW_UH_WLOCK(chain); /* arbitrate writers */ + for (i = 0; i < chain->n_rules - 1; i++) { + rule = chain->map[i]; + if (ipfw_match_range(rule, rt) == 0) + continue; + clear_counters(rule, log_only); + num++; + } + IPFW_UH_WUNLOCK(chain); + + return (num); +} + +static int +check_range_tlv(ipfw_range_tlv *rt) +{ + + if (rt->head.length != sizeof(*rt)) + return (1); + if (rt->start_rule > rt->end_rule) + return (1); + if (rt->set >= IPFW_MAX_SETS || rt->new_set >= IPFW_MAX_SETS) + return (1); + + return (0); +} + +/* + * Delete rules matching specified parameters + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_range_tlv ] + * Reply: [ ipfw_obj_header ipfw_range_tlv ] + * + * Saves number of deleted rules in ipfw_range_tlv->new_set. + * + * Returns 0 on success. + */ static int -keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n) +del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, + struct sockopt_data *sd) { - return - (rule->rulenum == IPFW_DEFAULT_RULE) || - (cmd == 0 && n == 0 && rule->set == RESVD_SET) || - !(cmd == 0 || rule->set == set) || - !(cmd == 1 || n == 0 || n == rule->rulenum); + ipfw_range_header *rh; + int error, ndel; + + if (sd->valsize != sizeof(*rh)) + return (EINVAL); + + rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); + + if (check_range_tlv(&rh->range) != 0) + return (EINVAL); + + ndel = 0; + if ((error = delete_range(chain, &rh->range, &ndel)) != 0) + return (error); + + /* Save number of rules deleted */ + rh->range.new_set = ndel; + return (0); +} + +/* + * Move rules/sets matching specified parameters + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_range_tlv ] + * + * Returns 0 on success. + */ +static int +move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_range_header *rh; + + if (sd->valsize != sizeof(*rh)) + return (EINVAL); + + rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); + + if (check_range_tlv(&rh->range) != 0) + return (EINVAL); + + return (move_range(chain, &rh->range)); +} + +/* + * Clear rule accounting data matching specified parameters + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_range_tlv ] + * Reply: [ ipfw_obj_header ipfw_range_tlv ] + * + * Saves number of cleared rules in ipfw_range_tlv->new_set. + * + * Returns 0 on success. + */ +static int +clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_range_header *rh; + int log_only, num; + char *msg; + + if (sd->valsize != sizeof(*rh)) + return (EINVAL); + + rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); + + if (check_range_tlv(&rh->range) != 0) + return (EINVAL); + + log_only = (op3->opcode == IP_FW_XRESETLOG); + + num = clear_range(chain, &rh->range, log_only); + + if (rh->range.flags & IPFW_RCFLAG_ALL) + msg = log_only ? "All logging counts reset" : + "Accounting cleared"; + else + msg = log_only ? "logging count reset" : "cleared"; + + if (V_fw_verbose) { + int lev = LOG_SECURITY | LOG_NOTICE; + log(lev, "ipfw: %s.\n", msg); + } + + /* Save number of rules cleared */ + rh->range.new_set = num; + return (0); +} + +static void +enable_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt) +{ + uint32_t v_set; + + IPFW_UH_WLOCK_ASSERT(chain); + + /* Change enabled/disabled sets mask */ + v_set = (V_set_disable | rt->set) & ~rt->new_set; + v_set &= ~(1 << RESVD_SET); /* set RESVD_SET always enabled */ + IPFW_WLOCK(chain); + V_set_disable = v_set; + IPFW_WUNLOCK(chain); +} + +static void +swap_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int mv) +{ + struct ip_fw *rule; + int i; + + IPFW_UH_WLOCK_ASSERT(chain); + + /* Swap or move two sets */ + for (i = 0; i < chain->n_rules - 1; i++) { + rule = chain->map[i]; + if (rule->set == rt->set) + rule->set = rt->new_set; + else if (rule->set == rt->new_set && mv == 0) + rule->set = rt->set; + } + if (V_fw_tables_sets != 0) + ipfw_swap_tables_sets(chain, rt->set, rt->new_set, mv); +} + +/* + * Swaps or moves set + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_range_tlv ] + * + * Returns 0 on success. + */ +static int +manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_range_header *rh; + + if (sd->valsize != sizeof(*rh)) + return (EINVAL); + + rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); + + if (rh->range.head.length != sizeof(ipfw_range_tlv)) + return (1); + + IPFW_UH_WLOCK(chain); + switch (op3->opcode) { + case IP_FW_SET_SWAP: + case IP_FW_SET_MOVE: + swap_sets(chain, &rh->range, op3->opcode == IP_FW_SET_MOVE); + break; + case IP_FW_SET_ENABLE: + enable_sets(chain, &rh->range); + break; + } + IPFW_UH_WUNLOCK(chain); + + return (0); } /** @@ -278,12 +1177,11 @@ keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n) static int del_entry(struct ip_fw_chain *chain, uint32_t arg) { - struct ip_fw *rule; uint32_t num; /* rule number or old_set */ uint8_t cmd, new_set; - int start, end, i, ofs, n; - struct ip_fw **map = NULL; + int do_del, ndel; int error = 0; + ipfw_range_tlv rt; num = arg & 0xffff; cmd = (arg >> 24) & 0xff; @@ -299,149 +1197,60 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg) return EINVAL; } - IPFW_UH_WLOCK(chain); /* arbitrate writers */ - chain->reap = NULL; /* prepare for deletions */ + /* Convert old requests into new representation */ + memset(&rt, 0, sizeof(rt)); + rt.start_rule = num; + rt.end_rule = num; + rt.set = num; + rt.new_set = new_set; + do_del = 0; switch (cmd) { - case 0: /* delete rules "num" (num == 0 matches all) */ - case 1: /* delete all rules in set N */ - case 5: /* delete rules with number N and set "new_set". */ - - /* - * Locate first rule to delete (start), the rule after - * the last one to delete (end), and count how many - * rules to delete (n). Always use keep_rule() to - * determine which rules to keep. - */ - n = 0; - if (cmd == 1) { - /* look for a specific set including RESVD_SET. - * Must scan the entire range, ignore num. - */ - new_set = num; - for (start = -1, end = i = 0; i < chain->n_rules; i++) { - if (keep_rule(chain->map[i], cmd, new_set, 0)) - continue; - if (start < 0) - start = i; - end = i; - n++; - } - end++; /* first non-matching */ - } else { - /* Optimized search on rule numbers */ - start = ipfw_find_rule(chain, num, 0); - for (end = start; end < chain->n_rules; end++) { - rule = chain->map[end]; - if (num > 0 && rule->rulenum != num) - break; - if (!keep_rule(rule, cmd, new_set, num)) - n++; - } - } - - if (n == 0) { - /* A flush request (arg == 0 or cmd == 1) on empty - * ruleset returns with no error. On the contrary, - * if there is no match on a specific request, - * we return EINVAL. - */ - if (arg != 0 && cmd != 1) - error = EINVAL; - break; - } - - /* We have something to delete. Allocate the new map */ - map = get_map(chain, -n, 1 /* locked */); - if (map == NULL) { - error = EINVAL; - break; - } - - /* 1. bcopy the initial part of the map */ - if (start > 0) - bcopy(chain->map, map, start * sizeof(struct ip_fw *)); - /* 2. copy active rules between start and end */ - for (i = ofs = start; i < end; i++) { - rule = chain->map[i]; - if (keep_rule(rule, cmd, new_set, num)) - map[ofs++] = rule; - } - /* 3. copy the final part of the map */ - bcopy(chain->map + end, map + ofs, - (chain->n_rules - end) * sizeof(struct ip_fw *)); - /* 4. swap the maps (under BH_LOCK) */ - map = swap_map(chain, map, chain->n_rules - n); - /* 5. now remove the rules deleted from the old map */ - if (cmd == 1) - ipfw_expire_dyn_rules(chain, NULL, new_set); - for (i = start; i < end; i++) { - rule = map[i]; - if (keep_rule(rule, cmd, new_set, num)) - continue; - chain->static_len -= RULESIZE(rule); - if (cmd != 1) - ipfw_expire_dyn_rules(chain, rule, RESVD_SET); - rule->x_next = chain->reap; - chain->reap = rule; - } + case 0: /* delete rules numbered "rulenum" */ + if (num == 0) + rt.flags |= IPFW_RCFLAG_ALL; + else + rt.flags |= IPFW_RCFLAG_RANGE; + do_del = 1; break; - - /* - * In the next 3 cases the loop stops at (n_rules - 1) - * because the default rule is never eligible.. - */ - - case 2: /* move rules with given RULE number to new set */ - for (i = 0; i < chain->n_rules - 1; i++) { - rule = chain->map[i]; - if (rule->rulenum == num) - rule->set = new_set; - } + case 1: /* delete rules in set "rulenum" */ + rt.flags |= IPFW_RCFLAG_SET; + do_del = 1; break; - - case 3: /* move rules with given SET number to new set */ - for (i = 0; i < chain->n_rules - 1; i++) { - rule = chain->map[i]; - if (rule->set == num) - rule->set = new_set; - } + case 5: /* delete rules "rulenum" and set "new_set" */ + rt.flags |= IPFW_RCFLAG_RANGE | IPFW_RCFLAG_SET; + rt.set = new_set; + rt.new_set = 0; + do_del = 1; break; - - case 4: /* swap two sets */ - for (i = 0; i < chain->n_rules - 1; i++) { - rule = chain->map[i]; - if (rule->set == num) - rule->set = new_set; - else if (rule->set == new_set) - rule->set = num; - } + case 2: /* move rules "rulenum" to set "new_set" */ + rt.flags |= IPFW_RCFLAG_RANGE; break; + case 3: /* move rules from set "rulenum" to set "new_set" */ + IPFW_UH_WLOCK(chain); + swap_sets(chain, &rt, 1); + IPFW_UH_WUNLOCK(chain); + return (0); + case 4: /* swap sets "rulenum" and "new_set" */ + IPFW_UH_WLOCK(chain); + swap_sets(chain, &rt, 0); + IPFW_UH_WUNLOCK(chain); + return (0); + default: + return (ENOTSUP); } - rule = chain->reap; - chain->reap = NULL; - IPFW_UH_WUNLOCK(chain); - ipfw_reap_rules(rule); - if (map) - free(map, M_IPFW); - return error; -} + if (do_del != 0) { + if ((error = delete_range(chain, &rt, &ndel)) != 0) + return (error); -/* - * Clear counters for a specific rule. - * Normally run under IPFW_UH_RLOCK, but these are idempotent ops - * so we only care that rules do not disappear. - */ -static void -clear_counters(struct ip_fw *rule, int log_only) -{ - ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); + if (ndel == 0 && (cmd != 1 && num != 0)) + return (EINVAL); - if (log_only == 0) - IPFW_ZERO_RULE_COUNTER(rule); - if (l->o.opcode == O_LOG) - l->log_left = l->max_log; + return (0); + } + + return (move_range(chain, &rt)); } /** @@ -512,23 +1321,57 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) return (0); } + /* - * Check validity of the structure before insert. - * Rules are simple, so this mostly need to check rule sizes. + * Check rule head in FreeBSD11 format + * */ static int -check_ipfw_struct(struct ip_fw *rule, int size) +check_ipfw_rule1(struct ip_fw_rule *rule, int size, + struct rule_check_info *ci) { - int l, cmdlen = 0; - int have_action=0; - ipfw_insn *cmd; + int l; + + if (size < sizeof(*rule)) { + printf("ipfw: rule too short\n"); + return (EINVAL); + } + + /* Check for valid cmd_len */ + l = roundup2(RULESIZE(rule), sizeof(uint64_t)); + if (l != size) { + printf("ipfw: size mismatch (have %d want %d)\n", size, l); + return (EINVAL); + } + if (rule->act_ofs >= rule->cmd_len) { + printf("ipfw: bogus action offset (%u > %u)\n", + rule->act_ofs, rule->cmd_len - 1); + return (EINVAL); + } + + if (rule->rulenum > IPFW_DEFAULT_RULE - 1) + return (EINVAL); + + return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); +} + +/* + * Check rule head in FreeBSD8 format + * + */ +static int +check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, + struct rule_check_info *ci) +{ + int l; if (size < sizeof(*rule)) { printf("ipfw: rule too short\n"); return (EINVAL); } - /* first, check for valid size */ - l = RULESIZE(rule); + + /* Check for valid cmd_len */ + l = sizeof(*rule) + rule->cmd_len * 4 - 4; if (l != size) { printf("ipfw: size mismatch (have %d want %d)\n", size, l); return (EINVAL); @@ -538,12 +1381,26 @@ check_ipfw_struct(struct ip_fw *rule, int size) rule->act_ofs, rule->cmd_len - 1); return (EINVAL); } + + if (rule->rulenum > IPFW_DEFAULT_RULE - 1) + return (EINVAL); + + return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); +} + +static int +check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci) +{ + int cmdlen, l; + int have_action; + + have_action = 0; + /* * Now go for the individual checks. Very simple ones, basically only * instruction sizes. */ - for (l = rule->cmd_len, cmd = rule->cmd ; - l > 0 ; l -= cmdlen, cmd += cmdlen) { + for (l = cmd_len; l > 0 ; l -= cmdlen, cmd += cmdlen) { cmdlen = F_LEN(cmd); if (cmdlen > l) { printf("ipfw: opcode %d size truncated\n", @@ -597,10 +1454,10 @@ check_ipfw_struct(struct ip_fw *rule, int size) case O_SETFIB: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; - if ((cmd->arg1 != IP_FW_TABLEARG) && - (cmd->arg1 >= rt_numfibs)) { + if ((cmd->arg1 != IP_FW_TARG) && + ((cmd->arg1 & 0x7FFFF) >= rt_numfibs)) { printf("ipfw: invalid fib number %d\n", - cmd->arg1); + cmd->arg1 & 0x7FFFF); return EINVAL; } goto check_action; @@ -662,6 +1519,18 @@ check_ipfw_struct(struct ip_fw *rule, int size) cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 && cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; + ci->table_opcodes++; + break; + case O_IP_FLOW_LOOKUP: + if (cmd->arg1 >= V_fw_tables_max) { + printf("ipfw: invalid table number %d\n", + cmd->arg1); + return (EINVAL); + } + if (cmdlen != F_INSN_SIZE(ipfw_insn) && + cmdlen != F_INSN_SIZE(ipfw_insn_u32)) + goto bad_size; + ci->table_opcodes++; break; case O_MACADDR2: if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) @@ -694,6 +1563,8 @@ check_ipfw_struct(struct ip_fw *rule, int size) case O_RECV: case O_XMIT: case O_VIA: + if (((ipfw_insn_if *)cmd)->name[0] == '\1') + ci->table_opcodes++; if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) goto bad_size; break; @@ -759,14 +1630,14 @@ check_action: printf("ipfw: opcode %d, multiple actions" " not allowed\n", cmd->opcode); - return EINVAL; + return (EINVAL); } have_action = 1; if (l != cmdlen) { printf("ipfw: opcode %d, action must be" " last opcode\n", cmd->opcode); - return EINVAL; + return (EINVAL); } break; #ifdef INET6 @@ -809,25 +1680,25 @@ check_action: case O_IP6_DST_MASK: case O_ICMP6TYPE: printf("ipfw: no IPv6 support in kernel\n"); - return EPROTONOSUPPORT; + return (EPROTONOSUPPORT); #endif default: printf("ipfw: opcode %d, unknown opcode\n", cmd->opcode); - return EINVAL; + return (EINVAL); } } } if (have_action == 0) { printf("ipfw: missing action\n"); - return EINVAL; + return (EINVAL); } return 0; bad_size: printf("ipfw: opcode %d size %d wrong\n", cmd->opcode, cmdlen); - return EINVAL; + return (EINVAL); } @@ -859,8 +1730,8 @@ struct ip_fw7 { ipfw_insn cmd[1]; /* storage for commands */ }; - int convert_rule_to_7(struct ip_fw *rule); -int convert_rule_to_8(struct ip_fw *rule); +static int convert_rule_to_7(struct ip_fw_rule0 *rule); +static int convert_rule_to_8(struct ip_fw_rule0 *rule); #ifndef RULESIZE7 #define RULESIZE7(rule) (sizeof(struct ip_fw7) + \ @@ -878,10 +1749,13 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) { char *bp = buf; char *ep = bp + space; - struct ip_fw *rule, *dst; - int l, i; + struct ip_fw *rule; + struct ip_fw_rule0 *dst; + int error, i, l, warnflag; time_t boot_seconds; + warnflag = 0; + boot_seconds = boottime.tv_sec; for (i = 0; i < chain->n_rules; i++) { rule = chain->map[i]; @@ -890,9 +1764,12 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) /* Convert rule to FreeBSd 7.2 format */ l = RULESIZE7(rule); if (bp + l + sizeof(uint32_t) <= ep) { - int error; bcopy(rule, bp, l + sizeof(uint32_t)); - error = convert_rule_to_7((struct ip_fw *) bp); + error = ipfw_rewrite_table_kidx(chain, + (struct ip_fw_rule0 *)bp); + if (error != 0) + return (0); + error = convert_rule_to_7((struct ip_fw_rule0 *) bp); if (error) return 0; /*XXX correct? */ /* @@ -910,76 +1787,793 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) continue; /* go to next rule */ } - /* normal mode, don't touch rules */ - l = RULESIZE(rule); + l = RULEUSIZE0(rule); if (bp + l > ep) { /* should not happen */ printf("overflow dumping static rules\n"); break; } - dst = (struct ip_fw *)bp; - bcopy(rule, dst, l); + dst = (struct ip_fw_rule0 *)bp; + export_rule0(rule, dst, l); + error = ipfw_rewrite_table_kidx(chain, dst); + /* * XXX HACK. Store the disable mask in the "next" * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? + * + * XXX: "ipfw set show" (ab)uses IP_FW_GET to read disabled mask + * so we need to fail _after_ saving at least one mask. */ bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable)); if (dst->timestamp) dst->timestamp += boot_seconds; bp += l; + + if (error != 0) { + if (error == 2) { + /* Non-fatal table rewrite error. */ + warnflag = 1; + continue; + } + printf("Stop on rule %d. Fail to convert table\n", + rule->rulenum); + break; + } } + if (warnflag != 0) + printf("ipfw: process %s is using legacy interfaces," + " consider rebuilding\n", ""); ipfw_get_dynamic(chain, &bp, ep); /* protected by the dynamic lock */ return (bp - (char *)buf); } +struct dump_args { + uint32_t b; /* start rule */ + uint32_t e; /* end rule */ + uint32_t rcount; /* number of rules */ + uint32_t rsize; /* rules size */ + uint32_t tcount; /* number of tables */ + int rcounters; /* counters */ +}; + +/* + * Dumps static rules with table TLVs in buffer @sd. + * + * Returns 0 on success. + */ +static int +dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da, + uint32_t *bmask, struct sockopt_data *sd) +{ + int error; + int i, l; + uint32_t tcount; + ipfw_obj_ctlv *ctlv; + struct ip_fw *krule; + caddr_t dst; + + /* Dump table names first (if any) */ + if (da->tcount > 0) { + /* Header first */ + ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); + if (ctlv == NULL) + return (ENOMEM); + ctlv->head.type = IPFW_TLV_TBLNAME_LIST; + ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) + + sizeof(*ctlv); + ctlv->count = da->tcount; + ctlv->objsize = sizeof(ipfw_obj_ntlv); + } + + i = 0; + tcount = da->tcount; + while (tcount > 0) { + if ((bmask[i / 32] & (1 << (i % 32))) == 0) { + i++; + continue; + } + + if ((error = ipfw_export_table_ntlv(chain, i, sd)) != 0) + return (error); + + i++; + tcount--; + } + + /* Dump rules */ + ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); + if (ctlv == NULL) + return (ENOMEM); + ctlv->head.type = IPFW_TLV_RULE_LIST; + ctlv->head.length = da->rsize + sizeof(*ctlv); + ctlv->count = da->rcount; + + for (i = da->b; i < da->e; i++) { + krule = chain->map[i]; + + l = RULEUSIZE1(krule) + sizeof(ipfw_obj_tlv); + if (da->rcounters != 0) + l += sizeof(struct ip_fw_bcounter); + dst = (caddr_t)ipfw_get_sopt_space(sd, l); + if (dst == NULL) + return (ENOMEM); + + export_rule1(krule, dst, l, da->rcounters); + } + + return (0); +} + +/* + * Dumps requested objects data + * Data layout (version 0)(current): + * Request: [ ipfw_cfg_lheader ] + IPFW_CFG_GET_* flags + * size = ipfw_cfg_lheader.size + * Reply: [ ipfw_rules_lheader + * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) + * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) + * ipfw_obj_tlv(IPFW_TLV_RULE_ENT) [ ip_fw_bcounter (optional) ip_fw_rule ] + * ] (optional) + * [ ipfw_obj_ctlv(IPFW_TLV_STATE_LIST) ipfw_obj_dyntlv x N ] (optional) + * ] + * * NOTE IPFW_TLV_STATE_LIST has the single valid field: objsize. + * The rest (size, count) are set to zero and needs to be ignored. + * + * Returns 0 on success. + */ +static int +dump_config(struct ip_fw_chain *chain, struct sockopt_data *sd) +{ + ipfw_cfg_lheader *hdr; + struct ip_fw *rule; + uint32_t sz, rnum; + int error, i; + struct dump_args da; + uint32_t *bmask; + + hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr)); + if (hdr == NULL) + return (EINVAL); + + error = 0; + bmask = NULL; + /* Allocate needed state */ + if (hdr->flags & IPFW_CFG_GET_STATIC) + bmask = malloc(IPFW_TABLES_MAX / 8, M_TEMP, M_WAITOK | M_ZERO); + + IPFW_UH_RLOCK(chain); + + /* + * STAGE 1: Determine size/count for objects in range. + * Prepare used tables bitmask. + */ + sz = 0; + memset(&da, 0, sizeof(da)); + + da.b = 0; + da.e = chain->n_rules; + + if (hdr->end_rule != 0) { + /* Handle custom range */ + if ((rnum = hdr->start_rule) > IPFW_DEFAULT_RULE) + rnum = IPFW_DEFAULT_RULE; + da.b = ipfw_find_rule(chain, rnum, 0); + rnum = hdr->end_rule; + rnum = (rnum < IPFW_DEFAULT_RULE) ? rnum+1 : IPFW_DEFAULT_RULE; + da.e = ipfw_find_rule(chain, rnum, 0); + } + + if (hdr->flags & IPFW_CFG_GET_STATIC) { + for (i = da.b; i < da.e; i++) { + rule = chain->map[i]; + da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv); + da.rcount++; + da.tcount += ipfw_mark_table_kidx(chain, rule, bmask); + } + /* Add counters if requested */ + if (hdr->flags & IPFW_CFG_GET_COUNTERS) { + da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount; + da.rcounters = 1; + } + + if (da.tcount > 0) + sz += da.tcount * sizeof(ipfw_obj_ntlv) + + sizeof(ipfw_obj_ctlv); + sz += da.rsize + sizeof(ipfw_obj_ctlv); + } + + if (hdr->flags & IPFW_CFG_GET_STATES) + sz += ipfw_dyn_get_count() * sizeof(ipfw_obj_dyntlv) + + sizeof(ipfw_obj_ctlv); + + /* Fill header anyway */ + hdr->size = sz; + hdr->set_mask = ~V_set_disable; + + if (sd->valsize < sz) { + IPFW_UH_RUNLOCK(chain); + return (ENOMEM); + } + + /* STAGE2: Store actual data */ + if (hdr->flags & IPFW_CFG_GET_STATIC) { + error = dump_static_rules(chain, &da, bmask, sd); + if (error != 0) { + IPFW_UH_RUNLOCK(chain); + return (error); + } + } + + if (hdr->flags & IPFW_CFG_GET_STATES) + error = ipfw_dump_states(chain, sd); + + IPFW_UH_RUNLOCK(chain); + + if (bmask != NULL) + free(bmask, M_TEMP); + + return (error); +} + #define IP_FW3_OPLENGTH(x) ((x)->sopt_valsize - sizeof(ip_fw3_opheader)) -/** - * {set|get}sockopt parser. +#define IP_FW3_WRITEBUF 4096 /* small page-size write buffer */ +#define IP_FW3_READBUF 16 * 1024 * 1024 /* handle large rulesets */ + + +static int +check_object_name(ipfw_obj_ntlv *ntlv) +{ + int error; + + switch (ntlv->head.type) { + case IPFW_TLV_TBL_NAME: + error = ipfw_check_table_name(ntlv->name); + break; + default: + error = ENOTSUP; + } + + return (0); +} + +/* + * Adds one or more rules to ipfw @chain. + * Data layout (version 0)(current): + * Request: + * [ + * ip_fw3_opheader + * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1) + * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] (*2) (*3) + * ] + * Reply: + * [ + * ip_fw3_opheader + * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) + * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] + * ] + * + * Rules in reply are modified to store their actual ruleset number. + * + * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending + * accoring to their idx field and there has to be no duplicates. + * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending. + * (*3) Each ip_fw structure needs to be aligned to u64 boundary. + * + * Returns 0 on success. */ -int -ipfw_ctl(struct sockopt *sopt) +static int +add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_ctlv *ctlv, *rtlv, *tstate; + ipfw_obj_ntlv *ntlv; + int clen, error, idx; + uint32_t count, read; + struct ip_fw_rule *r; + struct rule_check_info rci, *ci, *cbuf; + int i, rsize; + + if (sd->valsize > IP_FW3_READBUF) + return (EINVAL); + + op3 = (ip_fw3_opheader *)ipfw_get_sopt_space(sd, sd->valsize); + ctlv = (ipfw_obj_ctlv *)(op3 + 1); + + read = sizeof(ip_fw3_opheader); + rtlv = NULL; + tstate = NULL; + cbuf = NULL; + memset(&rci, 0, sizeof(struct rule_check_info)); + + if (read + sizeof(*ctlv) > sd->valsize) + return (EINVAL); + + if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) { + clen = ctlv->head.length; + /* Check size and alignment */ + if (clen > sd->valsize || clen < sizeof(*ctlv)) + return (EINVAL); + if ((clen % sizeof(uint64_t)) != 0) + return (EINVAL); + + /* + * Some table names or other named objects. + * Check for validness. + */ + count = (ctlv->head.length - sizeof(*ctlv)) / sizeof(*ntlv); + if (ctlv->count != count || ctlv->objsize != sizeof(*ntlv)) + return (EINVAL); + + /* + * Check each TLV. + * Ensure TLVs are sorted ascending and + * there are no duplicates. + */ + idx = -1; + ntlv = (ipfw_obj_ntlv *)(ctlv + 1); + while (count > 0) { + if (ntlv->head.length != sizeof(ipfw_obj_ntlv)) + return (EINVAL); + + error = check_object_name(ntlv); + if (error != 0) + return (error); + + if (ntlv->idx <= idx) + return (EINVAL); + + idx = ntlv->idx; + count--; + ntlv++; + } + + tstate = ctlv; + read += ctlv->head.length; + ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); + } + + if (read + sizeof(*ctlv) > sd->valsize) + return (EINVAL); + + if (ctlv->head.type == IPFW_TLV_RULE_LIST) { + clen = ctlv->head.length; + if (clen + read > sd->valsize || clen < sizeof(*ctlv)) + return (EINVAL); + if ((clen % sizeof(uint64_t)) != 0) + return (EINVAL); + + /* + * TODO: Permit adding multiple rules at once + */ + if (ctlv->count != 1) + return (ENOTSUP); + + clen -= sizeof(*ctlv); + + if (ctlv->count > clen / sizeof(struct ip_fw_rule)) + return (EINVAL); + + /* Allocate state for each rule or use stack */ + if (ctlv->count == 1) { + memset(&rci, 0, sizeof(struct rule_check_info)); + cbuf = &rci; + } else + cbuf = malloc(ctlv->count * sizeof(*ci), M_TEMP, + M_WAITOK | M_ZERO); + ci = cbuf; + + /* + * Check each rule for validness. + * Ensure numbered rules are sorted ascending + * and properly aligned + */ + idx = 0; + r = (struct ip_fw_rule *)(ctlv + 1); + count = 0; + error = 0; + while (clen > 0) { + rsize = roundup2(RULESIZE(r), sizeof(uint64_t)); + if (rsize > clen || ctlv->count <= count) { + error = EINVAL; + break; + } + + ci->ctlv = tstate; + error = check_ipfw_rule1(r, rsize, ci); + if (error != 0) + break; + + /* Check sorting */ + if (r->rulenum != 0 && r->rulenum < idx) { + printf("rulenum %d idx %d\n", r->rulenum, idx); + error = EINVAL; + break; + } + idx = r->rulenum; + + ci->urule = (caddr_t)r; + + rsize = roundup2(rsize, sizeof(uint64_t)); + clen -= rsize; + r = (struct ip_fw_rule *)((caddr_t)r + rsize); + count++; + ci++; + } + + if (ctlv->count != count || error != 0) { + if (cbuf != &rci) + free(cbuf, M_TEMP); + return (EINVAL); + } + + rtlv = ctlv; + read += ctlv->head.length; + ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); + } + + if (read != sd->valsize || rtlv == NULL || rtlv->count == 0) { + if (cbuf != NULL && cbuf != &rci) + free(cbuf, M_TEMP); + return (EINVAL); + } + + /* + * Passed rules seems to be valid. + * Allocate storage and try to add them to chain. + */ + for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) { + clen = RULEKSIZE1((struct ip_fw_rule *)ci->urule); + ci->krule = ipfw_alloc_rule(chain, clen); + import_rule1(ci); + } + + if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) { + /* Free allocate krules */ + for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) + free(ci->krule, M_IPFW); + } + + if (cbuf != NULL && cbuf != &rci) + free(cbuf, M_TEMP); + + return (error); +} + +/* + * Writes data accumulated in @sd to sockopt buffer. + * Zeroes internal @sd buffer. + */ +static int +ipfw_flush_sopt_data(struct sockopt_data *sd) { -#define RULE_MAXSIZE (256*sizeof(u_int32_t)) int error; - size_t size, len, valsize; - struct ip_fw *buf, *rule; + size_t sz; + + if ((sz = sd->koff) == 0) + return (0); + + if (sd->sopt->sopt_dir == SOPT_GET) { + error = sooptcopyout(sd->sopt, sd->kbuf, sz); + if (error != 0) + return (error); + } + + memset(sd->kbuf, 0, sd->ksize); + sd->ktotal += sd->koff; + sd->koff = 0; + if (sd->ktotal + sd->ksize < sd->valsize) + sd->kavail = sd->ksize; + else + sd->kavail = sd->valsize - sd->ktotal; + + /* Update sopt buffer */ + sd->sopt->sopt_valsize = sd->kavail; + sd->sopt->sopt_val = sd->sopt_val + sd->ktotal; + + return (0); +} + +/* + * Ensures that @sd buffer has contigious @neeeded number of + * bytes. + * + * Returns pointer to requested space or NULL. + */ +caddr_t +ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed) +{ + int error; + caddr_t addr; + + if (sd->kavail < needed) { + /* + * Flush data and try another time. + */ + error = ipfw_flush_sopt_data(sd); + + if (sd->kavail < needed || error != 0) + return (NULL); + } + + addr = sd->kbuf + sd->koff; + sd->koff += needed; + sd->kavail -= needed; + return (addr); +} + +/* + * Requests @needed contigious bytes from @sd buffer. + * Function is used to notify subsystem that we are + * interesed in first @needed bytes (request header) + * and the rest buffer can be safely zeroed. + * + * Returns pointer to requested space or NULL. + */ +caddr_t +ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed) +{ + caddr_t addr; + + if ((addr = ipfw_get_sopt_space(sd, needed)) == NULL) + return (NULL); + + if (sd->kavail > 0) + memset(sd->kbuf + sd->koff, 0, sd->kavail); + + return (addr); +} + +/* + * New sockopt handler. + */ +int +ipfw_ctl3(struct sockopt *sopt) +{ + int error, ctype; + size_t bsize_max, size, valsize; struct ip_fw_chain *chain; - u_int32_t rulenum[2]; uint32_t opt; - char xbuf[128]; + char xbuf[256]; + struct sockopt_data sdata; ip_fw3_opheader *op3 = NULL; error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); - if (error) + if (error != 0) + return (error); + + if (sopt->sopt_name != IP_FW3) + return (ipfw_ctl(sopt)); + + chain = &V_layer3_chain; + error = 0; + + /* Save original valsize before it is altered via sooptcopyin() */ + valsize = sopt->sopt_valsize; + memset(&sdata, 0, sizeof(sdata)); + /* Read op3 header first to determine actual operation */ + op3 = (ip_fw3_opheader *)xbuf; + error = sooptcopyin(sopt, op3, sizeof(*op3), sizeof(*op3)); + if (error != 0) return (error); + opt = op3->opcode; + sopt->sopt_valsize = valsize; + + /* + * Determine opcode type/buffer size: + * use on-stack xbuf for short request, + * allocate sliding-window buf for data export or + * contigious buffer for special ops. + */ + ctype = (sopt->sopt_dir == SOPT_GET) ? SOPT_GET : SOPT_SET; + switch (opt) { + case IP_FW_XADD: + case IP_FW_XDEL: + case IP_FW_TABLE_XADD: + case IP_FW_TABLE_XDEL: + ctype = SOPT_SET; + bsize_max = IP_FW3_READBUF; + break; + default: + bsize_max = IP_FW3_WRITEBUF; + } /* * Disallow modifications in really-really secure mode, but still allow * the logging counters to be reset. */ - if (sopt->sopt_name == IP_FW_ADD || - (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) { + if (ctype == SOPT_SET && opt != IP_FW_XRESETLOG) { error = securelevel_ge(sopt->sopt_td->td_ucred, 3); - if (error) + if (error != 0) return (error); } + /* + * Fill in sockopt_data structure that may be useful for + * IP_FW3 get requests. + */ + + if (valsize <= sizeof(xbuf)) { + sdata.kbuf = xbuf; + sdata.ksize = sizeof(xbuf); + sdata.kavail = valsize; + } else { + if (valsize < bsize_max) + size = valsize; + else + size = bsize_max; + + sdata.kbuf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); + sdata.ksize = size; + sdata.kavail = size; + } + + sdata.sopt = sopt; + sdata.sopt_val = sopt->sopt_val; + sdata.valsize = valsize; + + /* + * Copy either all request (if valsize < bsize_max) + * or first bsize_max bytes to guarantee most consumers + * that all necessary data has been copied). + * Anyway, copy not less than sizeof(ip_fw3_opheader). + */ + if ((error = sooptcopyin(sopt, sdata.kbuf, sdata.ksize, + sizeof(ip_fw3_opheader))) != 0) + return (error); + op3 = (ip_fw3_opheader *)sdata.kbuf; + opt = op3->opcode; + + switch (opt) { + case IP_FW_XGET: + error = dump_config(chain, &sdata); + break; + + case IP_FW_XADD: + error = add_rules(chain, op3, &sdata); + break; + + case IP_FW_XDEL: + error = del_rules(chain, op3, &sdata); + break; + + case IP_FW_XZERO: + case IP_FW_XRESETLOG: + error = clear_rules(chain, op3, &sdata); + break; + + case IP_FW_XMOVE: + error = move_rules(chain, op3, &sdata); + break; + + case IP_FW_SET_SWAP: + case IP_FW_SET_MOVE: + case IP_FW_SET_ENABLE: + error = manage_sets(chain, op3, &sdata); + break; + + case IP_FW_XIFLIST: + error = ipfw_list_ifaces(chain, &sdata); + break; + + /*--- TABLE opcodes ---*/ + case IP_FW_TABLE_XCREATE: + error = ipfw_create_table(chain, op3, &sdata); + break; + + case IP_FW_TABLE_XDESTROY: + case IP_FW_TABLE_XFLUSH: + error = ipfw_flush_table(chain, op3, &sdata); + break; + + case IP_FW_TABLE_XMODIFY: + error = ipfw_modify_table(chain, op3, &sdata); + break; + + case IP_FW_TABLE_XINFO: + error = ipfw_describe_table(chain, &sdata); + break; + + case IP_FW_TABLES_XLIST: + error = ipfw_list_tables(chain, &sdata); + break; + + case IP_FW_TABLE_XLIST: + error = ipfw_dump_table(chain, op3, &sdata); + break; + + case IP_FW_TABLE_XADD: + case IP_FW_TABLE_XDEL: + error = ipfw_manage_table_ent(chain, op3, &sdata); + break; + + case IP_FW_TABLE_XFIND: + error = ipfw_find_table_entry(chain, op3, &sdata); + break; + + case IP_FW_TABLE_XSWAP: + error = ipfw_swap_table(chain, op3, &sdata); + break; + + case IP_FW_TABLES_ALIST: + error = ipfw_list_table_algo(chain, &sdata); + break; + + case IP_FW_TABLE_XGETSIZE: + { + uint32_t *tbl; + struct tid_info ti; + + if (IP_FW3_OPLENGTH(sopt) < sizeof(uint32_t)) { + error = EINVAL; + break; + } + + tbl = (uint32_t *)(op3 + 1); + + memset(&ti, 0, sizeof(ti)); + ti.uidx = *tbl; + IPFW_UH_RLOCK(chain); + error = ipfw_count_xtable(chain, &ti, tbl); + IPFW_UH_RUNLOCK(chain); + if (error) + break; + error = sooptcopyout(sopt, op3, sopt->sopt_valsize); + } + break; + + default: + printf("ipfw: ipfw_ctl3 invalid option %d\n", opt); + error = EINVAL; + } + + /* Flush state and free buffers */ + if (error == 0) + error = ipfw_flush_sopt_data(&sdata); + else + ipfw_flush_sopt_data(&sdata); + + /* Restore original pointer and set number of bytes written */ + sopt->sopt_val = sdata.sopt_val; + sopt->sopt_valsize = sdata.ktotal; + if (sdata.kbuf != xbuf) + free(sdata.kbuf, M_TEMP); + + return (error); +} + +/** + * {set|get}sockopt parser. + */ +int +ipfw_ctl(struct sockopt *sopt) +{ +#define RULE_MAXSIZE (256*sizeof(u_int32_t)) + int error; + size_t size, valsize; + struct ip_fw *buf; + struct ip_fw_rule0 *rule; + struct ip_fw_chain *chain; + u_int32_t rulenum[2]; + uint32_t opt; + struct rule_check_info ci; + chain = &V_layer3_chain; error = 0; /* Save original valsize before it is altered via sooptcopyin() */ valsize = sopt->sopt_valsize; - if ((opt = sopt->sopt_name) == IP_FW3) { - /* - * Copy not less than sizeof(ip_fw3_opheader). - * We hope any IP_FW3 command will fit into 128-byte buffer. - */ - if ((error = sooptcopyin(sopt, xbuf, sizeof(xbuf), - sizeof(ip_fw3_opheader))) != 0) + opt = sopt->sopt_name; + + /* + * Disallow modifications in really-really secure mode, but still allow + * the logging counters to be reset. + */ + if (opt == IP_FW_ADD || + (sopt->sopt_dir == SOPT_SET && opt != IP_FW_RESETLOG)) { + error = securelevel_ge(sopt->sopt_td->td_ucred, 3); + if (error != 0) return (error); - op3 = (ip_fw3_opheader *)xbuf; - opt = op3->opcode; } switch (opt) { @@ -1002,7 +2596,7 @@ ipfw_ctl(struct sockopt *sopt) size += ipfw_dyn_len(); if (size >= sopt->sopt_valsize) break; - buf = malloc(size, M_TEMP, M_WAITOK); + buf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); IPFW_UH_RLOCK(chain); /* check again how much space we need */ want = chain->static_len + ipfw_dyn_len(); @@ -1027,6 +2621,8 @@ ipfw_ctl(struct sockopt *sopt) error = sooptcopyin(sopt, rule, RULE_MAXSIZE, sizeof(struct ip_fw7) ); + memset(&ci, 0, sizeof(struct rule_check_info)); + /* * If the size of commands equals RULESIZE7 then we assume * a FreeBSD7.2 binary is talking to us (set is7=1). @@ -1036,24 +2632,27 @@ ipfw_ctl(struct sockopt *sopt) * the first ipfw command is 'ipfw [pipe] list') * the ipfw binary may crash or loop infinitly... */ - if (sopt->sopt_valsize == RULESIZE7(rule)) { + size = sopt->sopt_valsize; + if (size == RULESIZE7(rule)) { is7 = 1; error = convert_rule_to_8(rule); if (error) { free(rule, M_TEMP); return error; } - if (error == 0) - error = check_ipfw_struct(rule, RULESIZE(rule)); - } else { + size = RULESIZE(rule); + } else is7 = 0; if (error == 0) - error = check_ipfw_struct(rule, sopt->sopt_valsize); - } + error = check_ipfw_rule0(rule, size, &ci); if (error == 0) { - /* locking is done within ipfw_add_rule() */ - error = ipfw_add_rule(chain, rule); - size = RULESIZE(rule); + /* locking is done within add_rule() */ + struct ip_fw *krule; + krule = ipfw_alloc_rule(chain, RULEKSIZE0(rule)); + ci.urule = (caddr_t)rule; + ci.krule = krule; + import_rule0(&ci); + error = commit_rules(chain, &ci, 1); if (!error && sopt->sopt_dir == SOPT_GET) { if (is7) { error = convert_rule_to_7(rule); @@ -1113,82 +2712,62 @@ ipfw_ctl(struct sockopt *sopt) sopt->sopt_name == IP_FW_RESETLOG); break; - /*--- TABLE manipulations are protected by the IPFW_LOCK ---*/ + /*--- TABLE opcodes ---*/ case IP_FW_TABLE_ADD: - { - ipfw_table_entry ent; - - error = sooptcopyin(sopt, &ent, - sizeof(ent), sizeof(ent)); - if (error) - break; - error = ipfw_add_table_entry(chain, ent.tbl, - &ent.addr, sizeof(ent.addr), ent.masklen, - IPFW_TABLE_CIDR, ent.value); - } - break; - case IP_FW_TABLE_DEL: { ipfw_table_entry ent; + struct tentry_info tei; + struct tid_info ti; error = sooptcopyin(sopt, &ent, sizeof(ent), sizeof(ent)); if (error) break; - error = ipfw_del_table_entry(chain, ent.tbl, - &ent.addr, sizeof(ent.addr), ent.masklen, IPFW_TABLE_CIDR); - } - break; - - case IP_FW_TABLE_XADD: /* IP_FW3 */ - case IP_FW_TABLE_XDEL: /* IP_FW3 */ - { - ipfw_table_xentry *xent = (ipfw_table_xentry *)(op3 + 1); - - /* Check minimum header size */ - if (IP_FW3_OPLENGTH(sopt) < offsetof(ipfw_table_xentry, k)) { - error = EINVAL; - break; - } - /* Check if len field is valid */ - if (xent->len > sizeof(ipfw_table_xentry)) { - error = EINVAL; - break; - } - - len = xent->len - offsetof(ipfw_table_xentry, k); - - error = (opt == IP_FW_TABLE_XADD) ? - ipfw_add_table_entry(chain, xent->tbl, &xent->k, - len, xent->masklen, xent->type, xent->value) : - ipfw_del_table_entry(chain, xent->tbl, &xent->k, - len, xent->masklen, xent->type); + memset(&tei, 0, sizeof(tei)); + tei.paddr = &ent.addr; + tei.subtype = AF_INET; + tei.masklen = ent.masklen; + tei.value = ent.value; + memset(&ti, 0, sizeof(ti)); + ti.uidx = ent.tbl; + ti.type = IPFW_TABLE_CIDR; + + error = (opt == IP_FW_TABLE_ADD) ? + add_table_entry(chain, &ti, &tei, 0, 1) : + del_table_entry(chain, &ti, &tei, 0, 1); } break; + case IP_FW_TABLE_FLUSH: { u_int16_t tbl; + struct tid_info ti; error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)); if (error) break; - error = ipfw_flush_table(chain, tbl); + memset(&ti, 0, sizeof(ti)); + ti.uidx = tbl; + error = flush_table(chain, &ti); } break; case IP_FW_TABLE_GETSIZE: { u_int32_t tbl, cnt; + struct tid_info ti; if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)))) break; + memset(&ti, 0, sizeof(ti)); + ti.uidx = tbl; IPFW_RLOCK(chain); - error = ipfw_count_table(chain, tbl, &cnt); + error = ipfw_count_table(chain, &ti, &cnt); IPFW_RUNLOCK(chain); if (error) break; @@ -1199,6 +2778,7 @@ ipfw_ctl(struct sockopt *sopt) case IP_FW_TABLE_LIST: { ipfw_table *tbl; + struct tid_info ti; if (sopt->sopt_valsize < sizeof(*tbl)) { error = EINVAL; @@ -1213,8 +2793,10 @@ ipfw_ctl(struct sockopt *sopt) } tbl->size = (size - sizeof(*tbl)) / sizeof(ipfw_table_entry); + memset(&ti, 0, sizeof(ti)); + ti.uidx = tbl->tbl; IPFW_RLOCK(chain); - error = ipfw_dump_table(chain, tbl); + error = ipfw_dump_table_legacy(chain, &ti, tbl); IPFW_RUNLOCK(chain); if (error) { free(tbl, M_TEMP); @@ -1225,62 +2807,6 @@ ipfw_ctl(struct sockopt *sopt) } break; - case IP_FW_TABLE_XGETSIZE: /* IP_FW3 */ - { - uint32_t *tbl; - - if (IP_FW3_OPLENGTH(sopt) < sizeof(uint32_t)) { - error = EINVAL; - break; - } - - tbl = (uint32_t *)(op3 + 1); - - IPFW_RLOCK(chain); - error = ipfw_count_xtable(chain, *tbl, tbl); - IPFW_RUNLOCK(chain); - if (error) - break; - error = sooptcopyout(sopt, op3, sopt->sopt_valsize); - } - break; - - case IP_FW_TABLE_XLIST: /* IP_FW3 */ - { - ipfw_xtable *tbl; - - if ((size = valsize) < sizeof(ipfw_xtable)) { - error = EINVAL; - break; - } - - tbl = malloc(size, M_TEMP, M_ZERO | M_WAITOK); - memcpy(tbl, op3, sizeof(ipfw_xtable)); - - /* Get maximum number of entries we can store */ - tbl->size = (size - sizeof(ipfw_xtable)) / - sizeof(ipfw_table_xentry); - IPFW_RLOCK(chain); - error = ipfw_dump_xtable(chain, tbl); - IPFW_RUNLOCK(chain); - if (error) { - free(tbl, M_TEMP); - break; - } - - /* Revert size field back to bytes */ - tbl->size = tbl->size * sizeof(ipfw_table_xentry) + - sizeof(ipfw_table); - /* - * Since we call sooptcopyin() with small buffer, sopt_valsize is - * decreased to reflect supplied buffer size. Set it back to original value - */ - sopt->sopt_valsize = valsize; - error = sooptcopyout(sopt, tbl, size); - free(tbl, M_TEMP); - } - break; - /*--- NAT operations are protected by the IPFW_LOCK ---*/ case IP_FW_NAT_CFG: if (IPFW_NAT_LOADED) @@ -1330,18 +2856,16 @@ ipfw_ctl(struct sockopt *sopt) return (error); #undef RULE_MAXSIZE } - - #define RULE_MAXSIZE (256*sizeof(u_int32_t)) /* Functions to convert rules 7.2 <==> 8.0 */ -int -convert_rule_to_7(struct ip_fw *rule) +static int +convert_rule_to_7(struct ip_fw_rule0 *rule) { /* Used to modify original rule */ struct ip_fw7 *rule7 = (struct ip_fw7 *)rule; /* copy of original rule, version 8 */ - struct ip_fw *tmp; + struct ip_fw_rule0 *tmp; /* Used to copy commands */ ipfw_insn *ccmd, *dst; @@ -1354,13 +2878,12 @@ convert_rule_to_7(struct ip_fw *rule) bcopy(rule, tmp, RULE_MAXSIZE); /* Copy fields */ - rule7->_pad = tmp->_pad; + //rule7->_pad = tmp->_pad; rule7->set = tmp->set; rule7->rulenum = tmp->rulenum; rule7->cmd_len = tmp->cmd_len; rule7->act_ofs = tmp->act_ofs; rule7->next_rule = (struct ip_fw7 *)tmp->next_rule; - rule7->next = (struct ip_fw7 *)tmp->x_next; rule7->cmd_len = tmp->cmd_len; rule7->pcnt = tmp->pcnt; rule7->bcnt = tmp->bcnt; @@ -1390,8 +2913,8 @@ convert_rule_to_7(struct ip_fw *rule) return 0; } -int -convert_rule_to_8(struct ip_fw *rule) +static int +convert_rule_to_8(struct ip_fw_rule0 *rule) { /* Used to modify original rule */ struct ip_fw7 *rule7 = (struct ip_fw7 *) rule; @@ -1433,7 +2956,6 @@ convert_rule_to_8(struct ip_fw *rule) rule->cmd_len = tmp->cmd_len; rule->act_ofs = tmp->act_ofs; rule->next_rule = (struct ip_fw *)tmp->next_rule; - rule->x_next = (struct ip_fw *)tmp->next; rule->cmd_len = tmp->cmd_len; rule->id = 0; /* XXX see if is ok = 0 */ rule->pcnt = tmp->pcnt; @@ -1444,4 +2966,302 @@ convert_rule_to_8(struct ip_fw *rule) return 0; } +/* + * Named object api + * + */ + +/* + * Allocate new bitmask which can be used to enlarge/shrink + * named instance index. + */ +void +ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks) +{ + size_t size; + int max_blocks; + u_long *idx_mask; + + items = roundup2(items, BLOCK_ITEMS); /* Align to block size */ + max_blocks = items / BLOCK_ITEMS; + size = items / 8; + idx_mask = malloc(size * IPFW_MAX_SETS, M_IPFW, M_WAITOK); + /* Mark all as free */ + memset(idx_mask, 0xFF, size * IPFW_MAX_SETS); + *idx_mask &= ~(u_long)1; /* Skip index 0 */ + + *idx = idx_mask; + *pblocks = max_blocks; +} + +/* + * Copy current bitmask index to new one. + */ +void +ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks) +{ + int old_blocks, new_blocks; + u_long *old_idx, *new_idx; + int i; + + old_idx = ni->idx_mask; + old_blocks = ni->max_blocks; + new_idx = *idx; + new_blocks = *blocks; + + for (i = 0; i < IPFW_MAX_SETS; i++) { + memcpy(&new_idx[new_blocks * i], &old_idx[old_blocks * i], + old_blocks * sizeof(u_long)); + } +} + +/* + * Swaps current @ni index with new one. + */ +void +ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks) +{ + int old_blocks; + u_long *old_idx; + + old_idx = ni->idx_mask; + old_blocks = ni->max_blocks; + + ni->idx_mask = *idx; + ni->max_blocks = *blocks; + + /* Save old values */ + *idx = old_idx; + *blocks = old_blocks; +} + +void +ipfw_objhash_bitmap_free(void *idx, int blocks) +{ + + free(idx, M_IPFW); +} + +/* + * Creates named hash instance. + * Must be called without holding any locks. + * Return pointer to new instance. + */ +struct namedobj_instance * +ipfw_objhash_create(uint32_t items) +{ + struct namedobj_instance *ni; + int i; + size_t size; + + size = sizeof(struct namedobj_instance) + + sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE + + sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE; + + ni = malloc(size, M_IPFW, M_WAITOK | M_ZERO); + ni->nn_size = NAMEDOBJ_HASH_SIZE; + ni->nv_size = NAMEDOBJ_HASH_SIZE; + + ni->names = (struct namedobjects_head *)(ni +1); + ni->values = &ni->names[ni->nn_size]; + + for (i = 0; i < ni->nn_size; i++) + TAILQ_INIT(&ni->names[i]); + + for (i = 0; i < ni->nv_size; i++) + TAILQ_INIT(&ni->values[i]); + + /* Allocate bitmask separately due to possible resize */ + ipfw_objhash_bitmap_alloc(items, (void*)&ni->idx_mask, &ni->max_blocks); + + return (ni); +} + +void +ipfw_objhash_destroy(struct namedobj_instance *ni) +{ + + free(ni->idx_mask, M_IPFW); + free(ni, M_IPFW); +} + +static uint32_t +objhash_hash_name(struct namedobj_instance *ni, uint32_t set, char *name) +{ + uint32_t v; + + v = fnv_32_str(name, FNV1_32_INIT); + + return (v % ni->nn_size); +} + +static uint32_t +objhash_hash_val(struct namedobj_instance *ni, uint32_t val) +{ + uint32_t v; + + v = val % (ni->nv_size - 1); + + return (v); +} + +struct named_object * +ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name) +{ + struct named_object *no; + uint32_t hash; + + hash = objhash_hash_name(ni, set, name); + + TAILQ_FOREACH(no, &ni->names[hash], nn_next) { + if ((strcmp(no->name, name) == 0) && (no->set == set)) + return (no); + } + + return (NULL); +} + +struct named_object * +ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t kidx) +{ + struct named_object *no; + uint32_t hash; + + hash = objhash_hash_val(ni, kidx); + + TAILQ_FOREACH(no, &ni->values[hash], nv_next) { + if (no->kidx == kidx) + return (no); + } + + return (NULL); +} + +int +ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a, + struct named_object *b) +{ + + if ((strcmp(a->name, b->name) == 0) && a->set == b->set) + return (1); + + return (0); +} + +void +ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no) +{ + uint32_t hash; + + hash = objhash_hash_name(ni, no->set, no->name); + TAILQ_INSERT_HEAD(&ni->names[hash], no, nn_next); + + hash = objhash_hash_val(ni, no->kidx); + TAILQ_INSERT_HEAD(&ni->values[hash], no, nv_next); + + ni->count++; +} + +void +ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no) +{ + uint32_t hash; + + hash = objhash_hash_name(ni, no->set, no->name); + TAILQ_REMOVE(&ni->names[hash], no, nn_next); + + hash = objhash_hash_val(ni, no->kidx); + TAILQ_REMOVE(&ni->values[hash], no, nv_next); + + ni->count--; +} + +uint32_t +ipfw_objhash_count(struct namedobj_instance *ni) +{ + + return (ni->count); +} + +/* + * Runs @func for each found named object. + * It is safe to delete objects from callback + */ +void +ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg) +{ + struct named_object *no, *no_tmp; + int i; + + for (i = 0; i < ni->nn_size; i++) { + TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) + f(ni, no, arg); + } +} + +/* + * Removes index from given set. + * Returns 0 on success. + */ +int +ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx) +{ + u_long *mask; + int i, v; + + i = idx / BLOCK_ITEMS; + v = idx % BLOCK_ITEMS; + + if (i >= ni->max_blocks) + return (1); + + mask = &ni->idx_mask[i]; + + if ((*mask & ((u_long)1 << v)) != 0) + return (1); + + /* Mark as free */ + *mask |= (u_long)1 << v; + + /* Update free offset */ + if (ni->free_off[0] > i) + ni->free_off[0] = i; + + return (0); +} + +/* + * Allocate new index in given set and stores in in @pidx. + * Returns 0 on success. + */ +int +ipfw_objhash_alloc_idx(void *n, uint16_t *pidx) +{ + struct namedobj_instance *ni; + u_long *mask; + int i, off, v; + + ni = (struct namedobj_instance *)n; + + off = ni->free_off[0]; + mask = &ni->idx_mask[off]; + + for (i = off; i < ni->max_blocks; i++, mask++) { + if ((v = ffsl(*mask)) == 0) + continue; + + /* Mark as busy */ + *mask &= ~ ((u_long)1 << (v - 1)); + + ni->free_off[0] = i; + + v = BLOCK_ITEMS * i + v - 1; + + *pidx = v; + return (0); + } + + return (1); +} + /* end of file */ diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c index 760a10c..1573c3f 100644 --- a/sys/netpfil/ipfw/ip_fw_table.c +++ b/sys/netpfil/ipfw/ip_fw_table.c @@ -27,24 +27,18 @@ __FBSDID("$FreeBSD$"); /* - * Lookup table support for ipfw + * Lookup table support for ipfw. * - * Lookup tables are implemented (at the moment) using the radix - * tree used for routing tables. Tables store key-value entries, where - * keys are network prefixes (addr/masklen), and values are integers. - * As a degenerate case we can interpret keys as 32-bit integers - * (with a /32 mask). + * This file contains handlers for all generic tables' operations: + * add/del/flush entries, list/dump tables etc.. * - * The table is protected by the IPFW lock even for manipulation coming - * from userland, because operations are typically fast. + * Table data modification is protected by both UH and runtimg lock + * while reading configuration/data is protected by UH lock. + * + * Lookup algorithms for all table types are located in ip_fw_table_algo.c */ #include "opt_ipfw.h" -#include "opt_inet.h" -#ifndef INET -#error IPFIREWALL requires INET. -#endif /* INET */ -#include "opt_inet6.h" #include <sys/param.h> #include <sys/systm.h> @@ -53,713 +47,3238 @@ __FBSDID("$FreeBSD$"); #include <sys/lock.h> #include <sys/rwlock.h> #include <sys/socket.h> +#include <sys/socketvar.h> #include <sys/queue.h> #include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ -#include <net/radix.h> -#include <net/route.h> -#include <net/vnet.h> #include <netinet/in.h> #include <netinet/ip_var.h> /* struct ipfw_rule_ref */ #include <netinet/ip_fw.h> #include <netpfil/ipfw/ip_fw_private.h> +#include <netpfil/ipfw/ip_fw_table.h> -#ifdef MAC -#include <security/mac/mac_framework.h> -#endif - -static MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); - -struct table_entry { - struct radix_node rn[2]; - struct sockaddr_in addr, mask; - u_int32_t value; + /* + * Table has the following `type` concepts: + * + * `no.type` represents lookup key type (cidr, ifp, uid, etc..) + * `vtype` represents table value type (currently U32) + * `ftype` (at the moment )is pure userland field helping to properly + * format value data e.g. "value is IPv4 nexthop" or "value is DSCP" + * or "value is port". + * + */ +struct table_config { + struct named_object no; + uint8_t vtype; /* value type */ + uint8_t vftype; /* value format type */ + uint8_t tflags; /* type flags */ + uint8_t locked; /* 1 if locked from changes */ + uint32_t count; /* Number of records */ + uint32_t limit; /* Max number of records */ + uint8_t linked; /* 1 if already linked */ + uint8_t ochanged; /* used by set swapping */ + uint16_t spare1; + uint32_t spare2; + uint32_t ocount; /* used by set swapping */ + uint64_t gencnt; /* generation count */ + char tablename[64]; /* table name */ + struct table_algo *ta; /* Callbacks for given algo */ + void *astate; /* algorithm state */ + struct table_info ti; /* data to put to table_info */ }; -struct xaddr_iface { - uint8_t if_len; /* length of this struct */ - uint8_t pad[7]; /* Align name */ - char ifname[IF_NAMESIZE]; /* Interface name */ +struct tables_config { + struct namedobj_instance *namehash; + int algo_count; + struct table_algo *algo[256]; + struct table_algo *def_algo[IPFW_TABLE_MAXTYPE + 1]; }; -struct table_xentry { - struct radix_node rn[2]; - union { -#ifdef INET6 - struct sockaddr_in6 addr6; -#endif - struct xaddr_iface iface; - } a; - union { -#ifdef INET6 - struct sockaddr_in6 mask6; -#endif - struct xaddr_iface ifmask; - } m; - u_int32_t value; -}; +static struct table_config *find_table(struct namedobj_instance *ni, + struct tid_info *ti); +static struct table_config *alloc_table_config(struct ip_fw_chain *ch, + struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags, + uint8_t vtype); +static void free_table_config(struct namedobj_instance *ni, + struct table_config *tc); +static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, + char *aname, ipfw_xtable_info *i, struct table_config **ptc, + struct table_algo **pta, uint16_t *pkidx, int ref); +static void link_table(struct ip_fw_chain *ch, struct table_config *tc); +static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc); +static void free_table_state(void **state, void **xstate, uint8_t type); +static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, + struct sockopt_data *sd); +static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, + ipfw_xtable_info *i); +static int dump_table_tentry(void *e, void *arg); +static int dump_table_xentry(void *e, void *arg); + +static int ipfw_dump_table_v0(struct ip_fw_chain *ch, struct sockopt_data *sd); +static int ipfw_dump_table_v1(struct ip_fw_chain *ch, struct sockopt_data *sd); +static int ipfw_manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd); +static int ipfw_manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd); +static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, + struct tid_info *b); + +static int check_table_space(struct ip_fw_chain *ch, struct table_config *tc, + struct table_info *ti, uint32_t count); +static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); + +static struct table_algo *find_table_algo(struct tables_config *tableconf, + struct tid_info *ti, char *name); + +static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti); +static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti); +static int classify_table_opcode(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype); + +#define CHAIN_TO_TCFG(chain) ((struct tables_config *)(chain)->tblcfg) +#define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash) +#define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k])) + +#define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ /* - * The radix code expects addr and mask to be array of bytes, - * with the first byte being the length of the array. rn_inithead - * is called with the offset in bits of the lookup key within the - * array. If we use a sockaddr_in as the underlying type, - * sin_len is conveniently located at offset 0, sin_addr is at - * offset 4 and normally aligned. - * But for portability, let's avoid assumption and make the code explicit + * Checks if we're able to insert/update entry @tei into table + * w.r.t @tc limits. + * May alter @tei to indicate insertion error / insert + * options. + * + * Returns 0 if operation can be performed/ */ -#define KEY_LEN(v) *((uint8_t *)&(v)) -#define KEY_OFS (8*offsetof(struct sockaddr_in, sin_addr)) +static int +check_table_limit(struct table_config *tc, struct tentry_info *tei) +{ + + if (tc->limit == 0 || tc->count < tc->limit) + return (0); + + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) { + /* Notify userland on error cause */ + tei->flags |= TEI_FLAGS_LIMIT; + return (EFBIG); + } + + /* + * We have UPDATE flag set. + * Permit updating record (if found), + * but restrict adding new one since we've + * already hit the limit. + */ + tei->flags |= TEI_FLAGS_DONTADD; + + return (0); +} + /* - * Do not require radix to compare more than actual IPv4/IPv6 address + * Convert algorithm callback return code into + * one of pre-defined states known by userland. */ -#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t)) -#define KEY_LEN_INET6 (offsetof(struct sockaddr_in6, sin6_addr) + sizeof(struct in6_addr)) -#define KEY_LEN_IFACE (offsetof(struct xaddr_iface, ifname)) +static void +store_tei_result(struct tentry_info *tei, int do_add, int error, uint32_t num) +{ + int flag; -#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr)) -#define OFF_LEN_INET6 (8 * offsetof(struct sockaddr_in6, sin6_addr)) -#define OFF_LEN_IFACE (8 * offsetof(struct xaddr_iface, ifname)) + flag = 0; + switch (error) { + case 0: + if (do_add && num != 0) + flag = TEI_FLAGS_ADDED; + if (do_add == 0) + flag = TEI_FLAGS_DELETED; + break; + case ENOENT: + flag = TEI_FLAGS_NOTFOUND; + break; + case EEXIST: + flag = TEI_FLAGS_EXISTS; + break; + default: + flag = TEI_FLAGS_ERROR; + } -#ifdef INET6 -static inline void -ipv6_writemask(struct in6_addr *addr6, uint8_t mask) + tei->flags |= flag; +} + +/* + * Creates and references table with default parameters. + * Saves table config, algo and allocated kidx info @ptc, @pta and + * @pkidx if non-zero. + * Used for table auto-creation to support old binaries. + * + * Returns 0 on success. + */ +static int +create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti, + struct table_config **ptc, struct table_algo **pta, uint16_t *pkidx) { - uint32_t *cp; + ipfw_xtable_info xi; + int error; + + memset(&xi, 0, sizeof(xi)); + /* Set u32 as default value type for legacy clients */ + xi.vtype = IPFW_VTYPE_U32; - for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) - *cp++ = 0xFFFFFFFF; - *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); + error = create_table_internal(ch, ti, NULL, &xi, ptc, pta, pkidx, 1); + if (error != 0) + return (error); + + return (0); } -#endif +/* + * Find and reference existing table optionally + * creating new one. + * + * Saves found table config/table algo into @ptc / @pta. + * Returns 0 if table was found/created and referenced + * or non-zero return code. + */ +static int +find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, + struct tentry_info *tei, uint32_t count, int do_add, + struct table_config **ptc, struct table_algo **pta) +{ + struct namedobj_instance *ni; + struct table_config *tc; + struct table_algo *ta; + int error; + + IPFW_UH_WLOCK(ch); + + ni = CHAIN_TO_NI(ch); + tc = NULL; + ta = NULL; + if ((tc = find_table(ni, ti)) != NULL) { + /* check table type */ + if (tc->no.type != ti->type) { + IPFW_UH_WUNLOCK(ch); + return (EINVAL); + } + + if (tc->locked != 0) { + IPFW_UH_WUNLOCK(ch); + return (EACCES); + } + + /* Try to exit early on limit hit */ + if (do_add != 0 && count == 1 && + check_table_limit(tc, tei) != 0) { + IPFW_UH_WUNLOCK(ch); + return (EFBIG); + } + + /* Reference and unlock */ + tc->no.refcnt++; + ta = tc->ta; + } + IPFW_UH_WUNLOCK(ch); + + if (tc == NULL) { + if (do_add == 0) + return (ESRCH); + + /* Compability mode: create new table for old clients */ + if ((tei->flags & TEI_FLAGS_COMPAT) == 0) + return (ESRCH); + + error = create_table_compat(ch, ti, &tc, &ta, NULL); + if (error != 0) + return (error); + + /* OK, now we've got referenced table. */ + } + + *ptc = tc; + *pta = ta; + return (0); +} + +/* + * Rolls back already @added to @tc entries using state arrat @ta_buf_m. + * Assume the following layout: + * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases + * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1]) + * for storing deleted state + */ +static void +rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc, + struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m, + uint32_t count, uint32_t added) +{ + struct table_algo *ta; + struct tentry_info *ptei; + caddr_t v, vv; + size_t ta_buf_sz; + int error, i; + uint32_t num; + + IPFW_UH_WLOCK_ASSERT(ch); + + ta = tc->ta; + ta_buf_sz = ta->ta_buf_size; + v = ta_buf_m; + vv = v + count * ta_buf_sz; + for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) { + ptei = &tei[i]; + if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) { + + /* + * We have old value stored by previous + * call in @ptei->value. Do add once again + * to restore it. + */ + error = ta->add(tc->astate, tinfo, ptei, v, &num); + KASSERT(error == 0, ("rollback UPDATE fail")); + KASSERT(num == 0, ("rollback UPDATE fail2")); + continue; + } + + error = ta->prepare_del(ch, ptei, vv); + KASSERT(error == 0, ("pre-rollback INSERT failed")); + error = ta->del(tc->astate, tinfo, ptei, vv, &num); + KASSERT(error == 0, ("rollback INSERT failed")); + tc->count -= num; + } +} + +/* + * Prepares add/del state for all @count entries in @tei. + * Uses either stack buffer (@ta_buf) or allocates a new one. + * Stores pointer to allocated buffer back to @ta_buf. + * + * Returns 0 on success. + */ +static int +prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, + struct tentry_info *tei, uint32_t count, int do_add, caddr_t *ta_buf) +{ + caddr_t ta_buf_m, v; + size_t ta_buf_sz, sz; + struct tentry_info *ptei; + int error, i; + + error = 0; + ta_buf_sz = ta->ta_buf_size; + if (count == 1) { + /* Sigle add/delete, use on-stack buffer */ + memset(*ta_buf, 0, TA_BUF_SZ); + ta_buf_m = *ta_buf; + } else { + + /* + * Multiple adds/deletes, allocate larger buffer + * + * Note we need 2xcount buffer for add case: + * we have hold both ADD state + * and DELETE state (this may be needed + * if we need to rollback all changes) + */ + sz = count * ta_buf_sz; + ta_buf_m = malloc((do_add != 0) ? sz * 2 : sz, M_TEMP, + M_WAITOK | M_ZERO); + } + + v = ta_buf_m; + for (i = 0; i < count; i++, v += ta_buf_sz) { + ptei = &tei[i]; + error = (do_add != 0) ? + ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v); + + /* + * Some syntax error (incorrect mask, or address, or + * anything). Return error regardless of atomicity + * settings. + */ + if (error != 0) + break; + } + + *ta_buf = ta_buf_m; + return (error); +} + +/* + * Flushes allocated state for each @count entries in @tei. + * Frees @ta_buf_m if differs from stack buffer @ta_buf. + */ +static void +flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, + struct tentry_info *tei, uint32_t count, int do_add, int rollback, + caddr_t ta_buf_m, caddr_t ta_buf) +{ + caddr_t v; + size_t ta_buf_sz; + int i; + + ta_buf_sz = ta->ta_buf_size; + + /* Run cleaning callback anyway */ + v = ta_buf_m; + for (i = 0; i < count; i++, v += ta_buf_sz) + ta->flush_entry(ch, &tei[i], v); + + /* Clean up "deleted" state in case of rollback */ + if (rollback != 0) { + v = ta_buf_m + count * ta_buf_sz; + for (i = 0; i < count; i++, v += ta_buf_sz) + ta->flush_entry(ch, &tei[i], v); + } + + if (ta_buf_m != ta_buf) + free(ta_buf_m, M_TEMP); +} + +/* + * Adds/updates one or more entries in table @ti. + * Function references @ti first to ensure table won't + * disappear or change its type. + * After that, prepare_add callback is called for each @tei entry. + * Next, we try to add each entry under UH+WHLOCK + * using add() callback. + * Finally, we free all state by calling flush_entry callback + * for each @tei. + * + * Returns 0 on success. + */ int -ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint8_t plen, uint8_t mlen, uint8_t type, uint32_t value) -{ - struct radix_node_head *rnh, **rnh_ptr; - struct table_entry *ent; - struct table_xentry *xent; - struct radix_node *rn; - in_addr_t addr; - int offset; - void *ent_ptr; - struct sockaddr *addr_ptr, *mask_ptr; - char c; - - if (tbl >= V_fw_tables_max) - return (EINVAL); +add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, + struct tentry_info *tei, uint8_t flags, uint32_t count) +{ + struct table_config *tc; + struct table_algo *ta; + uint16_t kidx; + int error, first_error, i, rollback; + uint32_t num, numadd; + struct tentry_info *ptei; + char ta_buf[TA_BUF_SZ]; + caddr_t ta_buf_m, v; - switch (type) { - case IPFW_TABLE_CIDR: - if (plen == sizeof(in_addr_t)) { -#ifdef INET - /* IPv4 case */ - if (mlen > 32) - return (EINVAL); - ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); - ent->value = value; - /* Set 'total' structure length */ - KEY_LEN(ent->addr) = KEY_LEN_INET; - KEY_LEN(ent->mask) = KEY_LEN_INET; - /* Set offset of IPv4 address in bits */ - offset = OFF_LEN_INET; - ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); - addr = *((in_addr_t *)paddr); - ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; - /* Set pointers */ - rnh_ptr = &ch->tables[tbl]; - ent_ptr = ent; - addr_ptr = (struct sockaddr *)&ent->addr; - mask_ptr = (struct sockaddr *)&ent->mask; -#endif -#ifdef INET6 - } else if (plen == sizeof(struct in6_addr)) { - /* IPv6 case */ - if (mlen > 128) - return (EINVAL); - xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); - xent->value = value; - /* Set 'total' structure length */ - KEY_LEN(xent->a.addr6) = KEY_LEN_INET6; - KEY_LEN(xent->m.mask6) = KEY_LEN_INET6; - /* Set offset of IPv6 address in bits */ - offset = OFF_LEN_INET6; - ipv6_writemask(&xent->m.mask6.sin6_addr, mlen); - memcpy(&xent->a.addr6.sin6_addr, paddr, sizeof(struct in6_addr)); - APPLY_MASK(&xent->a.addr6.sin6_addr, &xent->m.mask6.sin6_addr); - /* Set pointers */ - rnh_ptr = &ch->xtables[tbl]; - ent_ptr = xent; - addr_ptr = (struct sockaddr *)&xent->a.addr6; - mask_ptr = (struct sockaddr *)&xent->m.mask6; -#endif - } else { - /* Unknown CIDR type */ - return (EINVAL); + /* + * Find and reference existing table. + */ + if ((error = find_ref_table(ch, ti, tei, count, 1, &tc, &ta)) != 0) + return (error); + + /* Allocate memory and prepare record(s) */ + rollback = 0; + /* Pass stack buffer by default */ + ta_buf_m = ta_buf; + error = prepare_batch_buffer(ch, ta, tei, count, 1, &ta_buf_m); + if (error != 0) + goto cleanup; + + IPFW_UH_WLOCK(ch); + + /* Drop reference we've used in first search */ + tc->no.refcnt--; + + /* + * Ensure we are able to add all entries without additional + * memory allocations. May release/reacquire UH_WLOCK. + * check_table_space() guarantees us @tc won't disappear + * by referencing it internally. + */ + kidx = tc->no.kidx; + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), count); + if (error != 0) { + IPFW_UH_WUNLOCK(ch); + goto cleanup; + } + + /* + * Check if table algo is still the same. + * (changed ta may be the result of table swap). + */ + if (ta != tc->ta) { + IPFW_UH_WUNLOCK(ch); + error = EINVAL; + goto cleanup; + } + + /* We've got valid table in @tc. Let's try to add data */ + kidx = tc->no.kidx; + ta = tc->ta; + numadd = 0; + first_error = 0; + + IPFW_WLOCK(ch); + + v = ta_buf_m; + for (i = 0; i < count; i++, v += ta->ta_buf_size) { + ptei = &tei[i]; + num = 0; + /* check limit before adding */ + if ((error = check_table_limit(tc, ptei)) == 0) { + error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), + ptei, v, &num); + /* Set status flag to inform userland */ + store_tei_result(ptei, 1, error, num); } - break; - - case IPFW_TABLE_INTERFACE: - /* Check if string is terminated */ - c = ((char *)paddr)[IF_NAMESIZE - 1]; - ((char *)paddr)[IF_NAMESIZE - 1] = '\0'; - if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0')) - return (EINVAL); + if (error == 0) { + /* Update number of records to ease limit checking */ + tc->count += num; + numadd += num; + continue; + } + + if (first_error == 0) + first_error = error; - /* Include last \0 into comparison */ - mlen++; - - xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); - xent->value = value; - /* Set 'total' structure length */ - KEY_LEN(xent->a.iface) = KEY_LEN_IFACE + mlen; - KEY_LEN(xent->m.ifmask) = KEY_LEN_IFACE + mlen; - /* Set offset of interface name in bits */ - offset = OFF_LEN_IFACE; - memcpy(xent->a.iface.ifname, paddr, mlen); - /* Assume direct match */ - /* TODO: Add interface pattern matching */ -#if 0 - memset(xent->m.ifmask.ifname, 0xFF, IF_NAMESIZE); - mask_ptr = (struct sockaddr *)&xent->m.ifmask; -#endif - /* Set pointers */ - rnh_ptr = &ch->xtables[tbl]; - ent_ptr = xent; - addr_ptr = (struct sockaddr *)&xent->a.iface; - mask_ptr = NULL; + /* + * Some error have happened. Check our atomicity + * settings: continue if atomicity is not required, + * rollback changes otherwise. + */ + if ((flags & IPFW_CTF_ATOMIC) == 0) + continue; + + rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx), + tei, ta_buf_m, count, i); break; + } - default: - return (EINVAL); + IPFW_WUNLOCK(ch); + + /* Permit post-add algorithm grow/rehash. */ + if (numadd != 0) + check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); + + IPFW_UH_WUNLOCK(ch); + + /* Return first error to user, if any */ + error = first_error; + +cleanup: + flush_batch_buffer(ch, ta, tei, count, 1, rollback, ta_buf_m, ta_buf); + + return (error); +} + +/* + * Deletes one or more entries in table @ti. + * + * Returns 0 on success. + */ +int +del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, + struct tentry_info *tei, uint8_t flags, uint32_t count) +{ + struct table_config *tc; + struct table_algo *ta; + struct tentry_info *ptei; + uint16_t kidx; + int error, first_error, i; + uint32_t num, numdel; + char ta_buf[TA_BUF_SZ]; + caddr_t ta_buf_m, v; + + /* + * Find and reference existing table. + */ + if ((error = find_ref_table(ch, ti, tei, count, 0, &tc, &ta)) != 0) + return (error); + + /* Allocate memory and prepare record(s) */ + /* Pass stack buffer by default */ + ta_buf_m = ta_buf; + error = prepare_batch_buffer(ch, ta, tei, count, 0, &ta_buf_m); + if (error != 0) + goto cleanup; + + IPFW_UH_WLOCK(ch); + + /* Drop reference we've used in first search */ + tc->no.refcnt--; + + /* + * Check if table algo is still the same. + * (changed ta may be the result of table swap). + */ + if (ta != tc->ta) { + IPFW_UH_WUNLOCK(ch); + error = EINVAL; + goto cleanup; } + kidx = tc->no.kidx; + numdel = 0; + first_error = 0; + IPFW_WLOCK(ch); + v = ta_buf_m; + for (i = 0; i < count; i++, v += ta->ta_buf_size) { + ptei = &tei[i]; + num = 0; + error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, + &num); + /* Save state for userland */ + store_tei_result(ptei, 0, error, num); + if (error != 0 && first_error == 0) + first_error = error; + tc->count -= num; + numdel += num; + } + IPFW_WUNLOCK(ch); - /* Check if tabletype is valid */ - if ((ch->tabletype[tbl] != 0) && (ch->tabletype[tbl] != type)) { - IPFW_WUNLOCK(ch); - free(ent_ptr, M_IPFW_TBL); - return (EINVAL); + if (numdel != 0) { + /* Run post-del hook to permit shrinking */ + error = check_table_space(ch, tc, KIDX_TO_TI(ch, kidx), 0); } - /* Check if radix tree exists */ - if ((rnh = *rnh_ptr) == NULL) { - IPFW_WUNLOCK(ch); - /* Create radix for a new table */ - if (!rn_inithead((void **)&rnh, offset)) { - free(ent_ptr, M_IPFW_TBL); - return (ENOMEM); + IPFW_UH_WUNLOCK(ch); + + /* Return first error to user, if any */ + error = first_error; + +cleanup: + flush_batch_buffer(ch, ta, tei, count, 0, 0, ta_buf_m, ta_buf); + + return (error); +} + +/* + * Ensure that table @tc has enough space to add @count entries without + * need for reallocation. + * + * Callbacks order: + * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize. + * + * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags. + * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage + * 3) modify (UH_WLOCK + WLOCK) - switch pointers + * 4) flush_modify (UH_WLOCK) - free state, if needed + * + * Returns 0 on success. + */ +static int +check_table_space(struct ip_fw_chain *ch, struct table_config *tc, + struct table_info *ti, uint32_t count) +{ + struct table_algo *ta; + uint64_t pflags; + char ta_buf[TA_BUF_SZ]; + int error; + + IPFW_UH_WLOCK_ASSERT(ch); + + error = 0; + ta = tc->ta; + /* Acquire reference not to loose @tc between locks/unlocks */ + tc->no.refcnt++; + + /* + * TODO: think about avoiding race between large add/large delete + * operation on algorithm which implements shrinking along with + * growing. + */ + while (true) { + pflags = 0; + if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { + error = 0; + break; } - IPFW_WLOCK(ch); - if (*rnh_ptr != NULL) { - /* Tree is already attached by other thread */ - rn_detachhead((void **)&rnh); - rnh = *rnh_ptr; - /* Check table type another time */ - if (ch->tabletype[tbl] != type) { - IPFW_WUNLOCK(ch); - free(ent_ptr, M_IPFW_TBL); - return (EINVAL); - } - } else { - *rnh_ptr = rnh; - /* - * Set table type. It can be set already - * (if we have IPv6-only table) but setting - * it another time does not hurt + /* We have to shrink/grow table */ + IPFW_UH_WUNLOCK(ch); + + memset(&ta_buf, 0, sizeof(ta_buf)); + if ((error = ta->prepare_mod(ta_buf, &pflags)) != 0) { + IPFW_UH_WLOCK(ch); + break; + } + + IPFW_UH_WLOCK(ch); + + /* Check if we still need to alter table */ + ti = KIDX_TO_TI(ch, tc->no.kidx); + if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { + IPFW_UH_WUNLOCK(ch); + + /* + * Other thread has already performed resize. + * Flush our state and return. */ - ch->tabletype[tbl] = type; + ta->flush_mod(ta_buf); + break; } + + error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); + if (error == 0) { + /* Do actual modification */ + IPFW_WLOCK(ch); + ta->modify(tc->astate, ti, ta_buf, pflags); + IPFW_WUNLOCK(ch); + } + + /* Anyway, flush data and retry */ + ta->flush_mod(ta_buf); } - rn = rnh->rnh_addaddr(addr_ptr, mask_ptr, rnh, ent_ptr); - IPFW_WUNLOCK(ch); + tc->no.refcnt--; + return (error); +} - if (rn == NULL) { - free(ent_ptr, M_IPFW_TBL); - return (EEXIST); +/* + * Selects appropriate table operation handler + * depending on opcode version. + */ +int +ipfw_manage_table_ent(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + int error; + + switch (op3->version) { + case 0: + error = ipfw_manage_table_ent_v0(ch, op3, sd); + break; + case 1: + error = ipfw_manage_table_ent_v1(ch, op3, sd); + break; + default: + error = ENOTSUP; } - return (0); + + return (error); } -int -ipfw_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint8_t plen, uint8_t mlen, uint8_t type) +/* + * Adds or deletes record in table. + * Data layout (v0): + * Request: [ ip_fw3_opheader ipfw_table_xentry ] + * + * Returns 0 on success + */ +static int +ipfw_manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) { - struct radix_node_head *rnh, **rnh_ptr; - struct table_entry *ent; - in_addr_t addr; - struct sockaddr_in sa, mask; - struct sockaddr *sa_ptr, *mask_ptr; - char c; + ipfw_table_xentry *xent; + struct tentry_info tei; + struct tid_info ti; + int error, hdrlen, read; - if (tbl >= V_fw_tables_max) + hdrlen = offsetof(ipfw_table_xentry, k); + + /* Check minimum header size */ + if (sd->valsize < (sizeof(*op3) + hdrlen)) return (EINVAL); - switch (type) { - case IPFW_TABLE_CIDR: - if (plen == sizeof(in_addr_t)) { - /* Set 'total' structure length */ - KEY_LEN(sa) = KEY_LEN_INET; - KEY_LEN(mask) = KEY_LEN_INET; - mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); - addr = *((in_addr_t *)paddr); - sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; - rnh_ptr = &ch->tables[tbl]; - sa_ptr = (struct sockaddr *)&sa; - mask_ptr = (struct sockaddr *)&mask; -#ifdef INET6 - } else if (plen == sizeof(struct in6_addr)) { - /* IPv6 case */ - if (mlen > 128) - return (EINVAL); - struct sockaddr_in6 sa6, mask6; - memset(&sa6, 0, sizeof(struct sockaddr_in6)); - memset(&mask6, 0, sizeof(struct sockaddr_in6)); - /* Set 'total' structure length */ - KEY_LEN(sa6) = KEY_LEN_INET6; - KEY_LEN(mask6) = KEY_LEN_INET6; - ipv6_writemask(&mask6.sin6_addr, mlen); - memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr)); - APPLY_MASK(&sa6.sin6_addr, &mask6.sin6_addr); - rnh_ptr = &ch->xtables[tbl]; - sa_ptr = (struct sockaddr *)&sa6; - mask_ptr = (struct sockaddr *)&mask6; -#endif - } else { - /* Unknown CIDR type */ - return (EINVAL); - } - break; + read = sizeof(ip_fw3_opheader); - case IPFW_TABLE_INTERFACE: - /* Check if string is terminated */ - c = ((char *)paddr)[IF_NAMESIZE - 1]; - ((char *)paddr)[IF_NAMESIZE - 1] = '\0'; - if (((mlen = strlen((char *)paddr)) == IF_NAMESIZE - 1) && (c != '\0')) - return (EINVAL); + /* Check if xentry len field is valid */ + xent = (ipfw_table_xentry *)(op3 + 1); + if (xent->len < hdrlen || xent->len + read > sd->valsize) + return (EINVAL); + + memset(&tei, 0, sizeof(tei)); + tei.paddr = &xent->k; + tei.masklen = xent->masklen; + tei.value = xent->value; + /* Old requests compability */ + tei.flags = TEI_FLAGS_COMPAT; + if (xent->type == IPFW_TABLE_CIDR) { + if (xent->len - hdrlen == sizeof(in_addr_t)) + tei.subtype = AF_INET; + else + tei.subtype = AF_INET6; + } - struct xaddr_iface ifname, ifmask; - memset(&ifname, 0, sizeof(ifname)); - - /* Include last \0 into comparison */ - mlen++; - - /* Set 'total' structure length */ - KEY_LEN(ifname) = KEY_LEN_IFACE + mlen; - KEY_LEN(ifmask) = KEY_LEN_IFACE + mlen; - /* Assume direct match */ - /* FIXME: Add interface pattern matching */ -#if 0 - memset(ifmask.ifname, 0xFF, IF_NAMESIZE); - mask_ptr = (struct sockaddr *)&ifmask; -#endif - mask_ptr = NULL; - memcpy(ifname.ifname, paddr, mlen); - /* Set pointers */ - rnh_ptr = &ch->xtables[tbl]; - sa_ptr = (struct sockaddr *)&ifname; + memset(&ti, 0, sizeof(ti)); + ti.uidx = xent->tbl; + ti.type = xent->type; - break; + error = (op3->opcode == IP_FW_TABLE_XADD) ? + add_table_entry(ch, &ti, &tei, 0, 1) : + del_table_entry(ch, &ti, &tei, 0, 1); - default: + return (error); +} + +/* + * Adds or deletes record in table. + * Data layout (v1)(current): + * Request: [ ipfw_obj_header + * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ] + * ] + * + * Returns 0 on success + */ +static int +ipfw_manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_tentry *tent, *ptent; + ipfw_obj_ctlv *ctlv; + ipfw_obj_header *oh; + struct tentry_info *ptei, tei, *tei_buf; + struct tid_info ti; + int error, i, kidx, read; + + /* Check minimum header size */ + if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv))) + return (EINVAL); + + /* Check if passed data is too long */ + if (sd->valsize != sd->kavail) + return (EINVAL); + + oh = (ipfw_obj_header *)sd->kbuf; + + /* Basic length checks for TLVs */ + if (oh->ntlv.head.length != sizeof(oh->ntlv)) + return (EINVAL); + + read = sizeof(*oh); + + ctlv = (ipfw_obj_ctlv *)(oh + 1); + if (ctlv->head.length + read != sd->valsize) return (EINVAL); + + read += sizeof(*ctlv); + tent = (ipfw_obj_tentry *)(ctlv + 1); + if (ctlv->count * sizeof(*tent) + read != sd->valsize) + return (EINVAL); + + if (ctlv->count == 0) + return (0); + + /* + * Mark entire buffer as "read". + * This instructs sopt api write it back + * after function return. + */ + ipfw_get_sopt_header(sd, sd->valsize); + + /* Perform basic checks for each entry */ + ptent = tent; + kidx = tent->idx; + for (i = 0; i < ctlv->count; i++, ptent++) { + if (ptent->head.length != sizeof(*ptent)) + return (EINVAL); + if (ptent->idx != kidx) + return (ENOTSUP); } - IPFW_WLOCK(ch); - if ((rnh = *rnh_ptr) == NULL) { - IPFW_WUNLOCK(ch); + /* Convert data into kernel request objects */ + objheader_to_ti(oh, &ti); + ti.type = oh->ntlv.type; + ti.uidx = kidx; + + /* Use on-stack buffer for single add/del */ + if (ctlv->count == 1) { + memset(&tei, 0, sizeof(tei)); + tei_buf = &tei; + } else + tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP, + M_WAITOK | M_ZERO); + + ptei = tei_buf; + ptent = tent; + for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { + ptei->paddr = &ptent->k; + ptei->subtype = ptent->subtype; + ptei->masklen = ptent->masklen; + if (ptent->head.flags & IPFW_TF_UPDATE) + ptei->flags |= TEI_FLAGS_UPDATE; + ptei->value = ptent->value; + } + + error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ? + add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) : + del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count); + + /* Translate result back to userland */ + ptei = tei_buf; + ptent = tent; + for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { + if (ptei->flags & TEI_FLAGS_ADDED) + ptent->result = IPFW_TR_ADDED; + else if (ptei->flags & TEI_FLAGS_DELETED) + ptent->result = IPFW_TR_DELETED; + else if (ptei->flags & TEI_FLAGS_UPDATED) + ptent->result = IPFW_TR_UPDATED; + else if (ptei->flags & TEI_FLAGS_LIMIT) + ptent->result = IPFW_TR_LIMIT; + else if (ptei->flags & TEI_FLAGS_ERROR) + ptent->result = IPFW_TR_ERROR; + else if (ptei->flags & TEI_FLAGS_NOTFOUND) + ptent->result = IPFW_TR_NOTFOUND; + else if (ptei->flags & TEI_FLAGS_EXISTS) + ptent->result = IPFW_TR_EXISTS; + } + + if (tei_buf != &tei) + free(tei_buf, M_TEMP); + + return (error); +} + +/* + * Looks up an entry in given table. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_obj_tentry ] + * Reply: [ ipfw_obj_header ipfw_obj_tentry ] + * + * Returns 0 on success + */ +int +ipfw_find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_tentry *tent; + ipfw_obj_header *oh; + struct tid_info ti; + struct table_config *tc; + struct table_algo *ta; + struct table_info *kti; + struct namedobj_instance *ni; + int error; + size_t sz; + + /* Check minimum header size */ + sz = sizeof(*oh) + sizeof(*tent); + if (sd->valsize != sz) + return (EINVAL); + + oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); + tent = (ipfw_obj_tentry *)(oh + 1); + + /* Basic length checks for TLVs */ + if (oh->ntlv.head.length != sizeof(oh->ntlv)) + return (EINVAL); + + objheader_to_ti(oh, &ti); + ti.type = oh->ntlv.type; + ti.uidx = tent->idx; + + IPFW_UH_RLOCK(ch); + ni = CHAIN_TO_NI(ch); + + /* + * Find existing table and check its type . + */ + ta = NULL; + if ((tc = find_table(ni, &ti)) == NULL) { + IPFW_UH_RUNLOCK(ch); return (ESRCH); } - if (ch->tabletype[tbl] != type) { - IPFW_WUNLOCK(ch); + /* check table type */ + if (tc->no.type != ti.type) { + IPFW_UH_RUNLOCK(ch); return (EINVAL); } - ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh); - IPFW_WUNLOCK(ch); + kti = KIDX_TO_TI(ch, tc->no.kidx); + ta = tc->ta; - if (ent == NULL) - return (ESRCH); + if (ta->find_tentry == NULL) + return (ENOTSUP); - free(ent, M_IPFW_TBL); - return (0); + error = ta->find_tentry(tc->astate, kti, tent); + + IPFW_UH_RUNLOCK(ch); + + return (error); } -static int -flush_table_entry(struct radix_node *rn, void *arg) +/* + * Flushes all entries or destroys given table. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ] + * + * Returns 0 on success + */ +int +ipfw_flush_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) { - struct radix_node_head * const rnh = arg; - struct table_entry *ent; + int error; + struct _ipfw_obj_header *oh; + struct tid_info ti; + + if (sd->valsize != sizeof(*oh)) + return (EINVAL); + + oh = (struct _ipfw_obj_header *)op3; + objheader_to_ti(oh, &ti); + + if (op3->opcode == IP_FW_TABLE_XDESTROY) + error = destroy_table(ch, &ti); + else if (op3->opcode == IP_FW_TABLE_XFLUSH) + error = flush_table(ch, &ti); + else + return (ENOTSUP); + + return (error); +} + +/* + * Flushes given table. + * + * Function create new table instance with the same + * parameters, swaps it with old one and + * flushes state without holding any locks. + * + * Returns 0 on success. + */ +int +flush_table(struct ip_fw_chain *ch, struct tid_info *ti) +{ + struct namedobj_instance *ni; + struct table_config *tc; + struct table_algo *ta; + struct table_info ti_old, ti_new, *tablestate; + void *astate_old, *astate_new; + char algostate[64], *pstate; + int error; + uint16_t kidx; + uint8_t tflags; + + /* + * Stage 1: save table algoritm. + * Reference found table to ensure it won't disappear. + */ + IPFW_UH_WLOCK(ch); + ni = CHAIN_TO_NI(ch); + if ((tc = find_table(ni, ti)) == NULL) { + IPFW_UH_WUNLOCK(ch); + return (ESRCH); + } + ta = tc->ta; + tc->no.refcnt++; + /* Save startup algo parameters */ + if (ta->print_config != NULL) { + ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx), + algostate, sizeof(algostate)); + pstate = algostate; + } else + pstate = NULL; + tflags = tc->tflags; + IPFW_UH_WUNLOCK(ch); + + /* + * Stage 2: allocate new table instance using same algo. + */ + memset(&ti_new, 0, sizeof(struct table_info)); + if ((error = ta->init(ch, &astate_new, &ti_new, pstate, tflags)) != 0) { + IPFW_UH_WLOCK(ch); + tc->no.refcnt--; + IPFW_UH_WUNLOCK(ch); + return (error); + } + + /* + * Stage 3: swap old state pointers with newly-allocated ones. + * Decrease refcount. + */ + IPFW_UH_WLOCK(ch); + + ni = CHAIN_TO_NI(ch); + kidx = tc->no.kidx; + tablestate = (struct table_info *)ch->tablestate; + + IPFW_WLOCK(ch); + ti_old = tablestate[kidx]; + tablestate[kidx] = ti_new; + IPFW_WUNLOCK(ch); + + astate_old = tc->astate; + tc->astate = astate_new; + tc->ti = ti_new; + tc->count = 0; + tc->no.refcnt--; + + IPFW_UH_WUNLOCK(ch); + + /* + * Stage 4: perform real flush. + */ + ta->destroy(astate_old, &ti_old); - ent = (struct table_entry *) - rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); - if (ent != NULL) - free(ent, M_IPFW_TBL); return (0); } +/* + * Swaps two tables. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_obj_ntlv ] + * + * Returns 0 on success + */ int -ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl) +ipfw_swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) { - struct radix_node_head *rnh, *xrnh; + int error; + struct _ipfw_obj_header *oh; + struct tid_info ti_a, ti_b; - if (tbl >= V_fw_tables_max) + if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv)) return (EINVAL); + oh = (struct _ipfw_obj_header *)op3; + ntlv_to_ti(&oh->ntlv, &ti_a); + ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b); + + error = swap_tables(ch, &ti_a, &ti_b); + + return (error); +} + +/* + * Swaps two tables of the same type/valtype. + * + * Checks if tables are compatible and limits + * permits swap, than actually perform swap. + * + * Each table consists of 2 different parts: + * config: + * @tc (with name, set, kidx) and rule bindings, which is "stable". + * number of items + * table algo + * runtime: + * runtime data @ti (ch->tablestate) + * runtime cache in @tc + * algo-specific data (@tc->astate) + * + * So we switch: + * all runtime data + * number of items + * table algo + * + * After that we call @ti change handler for each table. + * + * Note that referencing @tc won't protect tc->ta from change. + * XXX: Do we need to restrict swap between locked tables? + * XXX: Do we need to exchange ftype? + * + * Returns 0 on success. + */ +static int +swap_tables(struct ip_fw_chain *ch, struct tid_info *a, + struct tid_info *b) +{ + struct namedobj_instance *ni; + struct table_config *tc_a, *tc_b; + struct table_algo *ta; + struct table_info ti, *tablestate; + void *astate; + uint32_t count; + /* - * We free both (IPv4 and extended) radix trees and - * clear table type here to permit table to be reused - * for different type without module reload + * Stage 1: find both tables and ensure they are of + * the same type. */ + IPFW_UH_WLOCK(ch); + ni = CHAIN_TO_NI(ch); + if ((tc_a = find_table(ni, a)) == NULL) { + IPFW_UH_WUNLOCK(ch); + return (ESRCH); + } + if ((tc_b = find_table(ni, b)) == NULL) { + IPFW_UH_WUNLOCK(ch); + return (ESRCH); + } + + /* It is very easy to swap between the same table */ + if (tc_a == tc_b) { + IPFW_UH_WUNLOCK(ch); + return (0); + } + + /* Check type and value are the same */ + if (tc_a->no.type != tc_b->no.type || tc_a->tflags != tc_b->tflags || + tc_a->vtype != tc_b->vtype) { + IPFW_UH_WUNLOCK(ch); + return (EINVAL); + } + + /* Check limits before swap */ + if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) || + (tc_b->limit != 0 && tc_a->count > tc_b->limit)) { + IPFW_UH_WUNLOCK(ch); + return (EFBIG); + } + + /* Everything is fine, prepare to swap */ + tablestate = (struct table_info *)ch->tablestate; + ti = tablestate[tc_a->no.kidx]; + ta = tc_a->ta; + astate = tc_a->astate; + count = tc_a->count; IPFW_WLOCK(ch); - /* Set IPv4 table pointer to zero */ - if ((rnh = ch->tables[tbl]) != NULL) - ch->tables[tbl] = NULL; - /* Set extended table pointer to zero */ - if ((xrnh = ch->xtables[tbl]) != NULL) - ch->xtables[tbl] = NULL; - /* Zero table type */ - ch->tabletype[tbl] = 0; + /* a <- b */ + tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx]; + tc_a->ta = tc_b->ta; + tc_a->astate = tc_b->astate; + tc_a->count = tc_b->count; + /* b <- a */ + tablestate[tc_b->no.kidx] = ti; + tc_b->ta = ta; + tc_b->astate = astate; + tc_b->count = count; IPFW_WUNLOCK(ch); - if (rnh != NULL) { - rnh->rnh_walktree(rnh, flush_table_entry, rnh); - rn_detachhead((void **)&rnh); + /* Ensure tc.ti copies are in sync */ + tc_a->ti = tablestate[tc_a->no.kidx]; + tc_b->ti = tablestate[tc_b->no.kidx]; + + /* Notify both tables on @ti change */ + if (tc_a->ta->change_ti != NULL) + tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]); + if (tc_b->ta->change_ti != NULL) + tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]); + + IPFW_UH_WUNLOCK(ch); + + return (0); +} + +/* + * Destroys table specified by @ti. + * Data layout (v0)(current): + * Request: [ ip_fw3_opheader ] + * + * Returns 0 on success + */ +static int +destroy_table(struct ip_fw_chain *ch, struct tid_info *ti) +{ + struct namedobj_instance *ni; + struct table_config *tc; + + IPFW_UH_WLOCK(ch); + + ni = CHAIN_TO_NI(ch); + if ((tc = find_table(ni, ti)) == NULL) { + IPFW_UH_WUNLOCK(ch); + return (ESRCH); } - if (xrnh != NULL) { - xrnh->rnh_walktree(xrnh, flush_table_entry, xrnh); - rn_detachhead((void **)&xrnh); + /* Do not permit destroying referenced tables */ + if (tc->no.refcnt > 0) { + IPFW_UH_WUNLOCK(ch); + return (EBUSY); } + IPFW_WLOCK(ch); + unlink_table(ch, tc); + IPFW_WUNLOCK(ch); + + /* Free obj index */ + if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0) + printf("Error unlinking kidx %d from table %s\n", + tc->no.kidx, tc->tablename); + + IPFW_UH_WUNLOCK(ch); + + free_table_config(ni, tc); + return (0); } +static void +destroy_table_locked(struct namedobj_instance *ni, struct named_object *no, + void *arg) +{ + + unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no); + if (ipfw_objhash_free_idx(ni, no->kidx) != 0) + printf("Error unlinking kidx %d from table %s\n", + no->kidx, no->name); + free_table_config(ni, (struct table_config *)no); +} + +/* + * Shuts tables module down. + */ void ipfw_destroy_tables(struct ip_fw_chain *ch) { - uint16_t tbl; - /* Flush all tables */ - for (tbl = 0; tbl < V_fw_tables_max; tbl++) - ipfw_flush_table(ch, tbl); + /* Remove all tables from working set */ + IPFW_UH_WLOCK(ch); + IPFW_WLOCK(ch); + ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch); + IPFW_WUNLOCK(ch); + IPFW_UH_WUNLOCK(ch); /* Free pointers itself */ - free(ch->tables, M_IPFW); - free(ch->xtables, M_IPFW); - free(ch->tabletype, M_IPFW); + free(ch->tablestate, M_IPFW); + + ipfw_table_algo_destroy(ch); + + ipfw_objhash_destroy(CHAIN_TO_NI(ch)); + free(CHAIN_TO_TCFG(ch), M_IPFW); } +/* + * Starts tables module. + */ int ipfw_init_tables(struct ip_fw_chain *ch) { + struct tables_config *tcfg; + /* Allocate pointers */ - ch->tables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO); - ch->xtables = malloc(V_fw_tables_max * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO); - ch->tabletype = malloc(V_fw_tables_max * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO); + ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info), + M_IPFW, M_WAITOK | M_ZERO); + + tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO); + tcfg->namehash = ipfw_objhash_create(V_fw_tables_max); + ch->tblcfg = tcfg; + + ipfw_table_algo_init(ch); + return (0); } +/* + * Grow tables index. + * + * Returns 0 on success. + */ int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) { - struct radix_node_head **tables, **xtables, *rnh; - struct radix_node_head **tables_old, **xtables_old; - uint8_t *tabletype, *tabletype_old; unsigned int ntables_old, tbl; + struct namedobj_instance *ni; + void *new_idx, *old_tablestate, *tablestate; + struct table_info *ti; + struct table_config *tc; + int i, new_blocks; /* Check new value for validity */ if (ntables > IPFW_TABLES_MAX) ntables = IPFW_TABLES_MAX; /* Allocate new pointers */ - tables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO); - xtables = malloc(ntables * sizeof(void *), M_IPFW, M_WAITOK | M_ZERO); - tabletype = malloc(ntables * sizeof(uint8_t), M_IPFW, M_WAITOK | M_ZERO); + tablestate = malloc(ntables * sizeof(struct table_info), + M_IPFW, M_WAITOK | M_ZERO); - IPFW_WLOCK(ch); + ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks); + + IPFW_UH_WLOCK(ch); tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables; + ni = CHAIN_TO_NI(ch); + + /* Temporary restrict decreasing max_tables */ + if (ntables < V_fw_tables_max) { + + /* + * FIXME: Check if we really can shrink + */ + IPFW_UH_WUNLOCK(ch); + return (EINVAL); + } + + /* Copy table info/indices */ + memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl); + ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks); - /* Copy old table pointers */ - memcpy(tables, ch->tables, sizeof(void *) * tbl); - memcpy(xtables, ch->xtables, sizeof(void *) * tbl); - memcpy(tabletype, ch->tabletype, sizeof(uint8_t) * tbl); + IPFW_WLOCK(ch); - /* Change pointers and number of tables */ - tables_old = ch->tables; - xtables_old = ch->xtables; - tabletype_old = ch->tabletype; - ch->tables = tables; - ch->xtables = xtables; - ch->tabletype = tabletype; + /* Change pointers */ + old_tablestate = ch->tablestate; + ch->tablestate = tablestate; + ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks); ntables_old = V_fw_tables_max; V_fw_tables_max = ntables; IPFW_WUNLOCK(ch); - /* Check if we need to destroy radix trees */ - if (ntables < ntables_old) { - for (tbl = ntables; tbl < ntables_old; tbl++) { - if ((rnh = tables_old[tbl]) != NULL) { - rnh->rnh_walktree(rnh, flush_table_entry, rnh); - rn_detachhead((void **)&rnh); - } + /* Notify all consumers that their @ti pointer has changed */ + ti = (struct table_info *)ch->tablestate; + for (i = 0; i < tbl; i++, ti++) { + if (ti->lookup == NULL) + continue; + tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i); + if (tc == NULL || tc->ta->change_ti == NULL) + continue; + + tc->ta->change_ti(tc->astate, ti); + } + + IPFW_UH_WUNLOCK(ch); - if ((rnh = xtables_old[tbl]) != NULL) { - rnh->rnh_walktree(rnh, flush_table_entry, rnh); - rn_detachhead((void **)&rnh); + /* Free old pointers */ + free(old_tablestate, M_IPFW); + ipfw_objhash_bitmap_free(new_idx, new_blocks); + + return (0); +} + +/* + * Switch between "set 0" and "rule's set" table binding, + * Check all ruleset bindings and permits changing + * IFF each binding has both rule AND table in default set (set 0). + * + * Returns 0 on success. + */ +int +ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets) +{ + struct namedobj_instance *ni; + struct named_object *no; + struct ip_fw *rule; + ipfw_insn *cmd; + int cmdlen, i, l; + uint16_t kidx; + uint8_t type; + + IPFW_UH_WLOCK(ch); + + if (V_fw_tables_sets == sets) { + IPFW_UH_WUNLOCK(ch); + return (0); + } + + ni = CHAIN_TO_NI(ch); + + /* + * Scan all rules and examine tables opcodes. + */ + for (i = 0; i < ch->n_rules; i++) { + rule = ch->map[i]; + + l = rule->cmd_len; + cmd = rule->cmd; + cmdlen = 0; + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + if (classify_table_opcode(cmd, &kidx, &type) != 0) + continue; + + no = ipfw_objhash_lookup_kidx(ni, kidx); + + /* Check if both table object and rule has the set 0 */ + if (no->set != 0 || rule->set != 0) { + IPFW_UH_WUNLOCK(ch); + return (EBUSY); } + } } + V_fw_tables_sets = sets; - /* Free old pointers */ - free(tables_old, M_IPFW); - free(xtables_old, M_IPFW); - free(tabletype_old, M_IPFW); + IPFW_UH_WUNLOCK(ch); return (0); } +/* + * Lookup an IP @addr in table @tbl. + * Stores found value in @val. + * + * Returns 1 if @addr was found. + */ int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val) { - struct radix_node_head *rnh; - struct table_entry *ent; - struct sockaddr_in sa; + struct table_info *ti; - if (tbl >= V_fw_tables_max) - return (0); - if ((rnh = ch->tables[tbl]) == NULL) - return (0); - KEY_LEN(sa) = KEY_LEN_INET; - sa.sin_addr.s_addr = addr; - ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh)); - if (ent != NULL) { - *val = ent->value; - return (1); + ti = KIDX_TO_TI(ch, tbl); + + return (ti->lookup(ti, &addr, sizeof(in_addr_t), val)); +} + +/* + * Lookup an arbtrary key @paddr of legth @plen in table @tbl. + * Stores found value in @val. + * + * Returns 1 if key was found. + */ +int +ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, + void *paddr, uint32_t *val) +{ + struct table_info *ti; + + ti = KIDX_TO_TI(ch, tbl); + + return (ti->lookup(ti, paddr, plen, val)); +} + +/* + * Info/List/dump support for tables. + * + */ + +/* + * High-level 'get' cmds sysctl handlers + */ + +/* + * Lists all tables currently available in kernel. + * Data layout (v0)(current): + * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size + * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ] + * + * Returns 0 on success + */ +int +ipfw_list_tables(struct ip_fw_chain *ch, struct sockopt_data *sd) +{ + struct _ipfw_obj_lheader *olh; + int error; + + olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); + if (olh == NULL) + return (EINVAL); + if (sd->valsize < olh->size) + return (EINVAL); + + IPFW_UH_RLOCK(ch); + error = export_tables(ch, olh, sd); + IPFW_UH_RUNLOCK(ch); + + return (error); +} + +/* + * Store table info to buffer provided by @sd. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_xtable_info(empty)] + * Reply: [ ipfw_obj_header ipfw_xtable_info ] + * + * Returns 0 on success. + */ +int +ipfw_describe_table(struct ip_fw_chain *ch, struct sockopt_data *sd) +{ + struct _ipfw_obj_header *oh; + struct table_config *tc; + struct tid_info ti; + size_t sz; + + sz = sizeof(*oh) + sizeof(ipfw_xtable_info); + oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); + if (oh == NULL) + return (EINVAL); + + objheader_to_ti(oh, &ti); + + IPFW_UH_RLOCK(ch); + if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { + IPFW_UH_RUNLOCK(ch); + return (ESRCH); } + + export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1)); + IPFW_UH_RUNLOCK(ch); + return (0); } +/* + * Modifies existing table. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_xtable_info ] + * + * Returns 0 on success + */ int -ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, void *paddr, - uint32_t *val, int type) +ipfw_modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) { - struct radix_node_head *rnh; - struct table_xentry *xent; - struct sockaddr_in6 sa6; - struct xaddr_iface iface; + struct _ipfw_obj_header *oh; + ipfw_xtable_info *i; + char *tname; + struct tid_info ti; + struct namedobj_instance *ni; + struct table_config *tc; + + if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) + return (EINVAL); - if (tbl >= V_fw_tables_max) - return (0); - if ((rnh = ch->xtables[tbl]) == NULL) - return (0); + oh = (struct _ipfw_obj_header *)sd->kbuf; + i = (ipfw_xtable_info *)(oh + 1); - switch (type) { - case IPFW_TABLE_CIDR: - KEY_LEN(sa6) = KEY_LEN_INET6; - memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr)); - xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); - break; + /* + * Verify user-supplied strings. + * Check for null-terminated/zero-length strings/ + */ + tname = oh->ntlv.name; + if (ipfw_check_table_name(tname) != 0) + return (EINVAL); - case IPFW_TABLE_INTERFACE: - KEY_LEN(iface) = KEY_LEN_IFACE + - strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1; - /* Assume direct match */ - /* FIXME: Add interface pattern matching */ - xent = (struct table_xentry *)(rnh->rnh_matchaddr(&iface, rnh)); - break; + objheader_to_ti(oh, &ti); + ti.type = i->type; - default: - return (0); + IPFW_UH_WLOCK(ch); + ni = CHAIN_TO_NI(ch); + if ((tc = find_table(ni, &ti)) == NULL) { + IPFW_UH_WUNLOCK(ch); + return (ESRCH); } + if ((i->mflags & IPFW_TMFLAGS_FTYPE) != 0) + tc->vftype = i->vftype; + if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0) + tc->limit = i->limit; + if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0) + tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0); + IPFW_UH_WUNLOCK(ch); - if (xent != NULL) { - *val = xent->value; - return (1); + return (0); +} + +/* + * Creates new table. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_xtable_info ] + * + * Returns 0 on success + */ +int +ipfw_create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + struct _ipfw_obj_header *oh; + ipfw_xtable_info *i; + char *tname, *aname; + struct tid_info ti; + struct namedobj_instance *ni; + struct table_config *tc; + + if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) + return (EINVAL); + + oh = (struct _ipfw_obj_header *)sd->kbuf; + i = (ipfw_xtable_info *)(oh + 1); + + /* + * Verify user-supplied strings. + * Check for null-terminated/zero-length strings/ + */ + tname = oh->ntlv.name; + aname = i->algoname; + if (ipfw_check_table_name(tname) != 0 || + strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname)) + return (EINVAL); + + if (aname[0] == '\0') { + /* Use default algorithm */ + aname = NULL; + } + + objheader_to_ti(oh, &ti); + ti.type = i->type; + + ni = CHAIN_TO_NI(ch); + + IPFW_UH_RLOCK(ch); + if ((tc = find_table(ni, &ti)) != NULL) { + IPFW_UH_RUNLOCK(ch); + return (EEXIST); } + IPFW_UH_RUNLOCK(ch); + + return (create_table_internal(ch, &ti, aname, i, NULL, NULL, NULL, 0)); +} + +/* + * Creates new table based on @ti and @aname. + * + * Relies on table name checking inside find_name_tlv() + * Assume @aname to be checked and valid. + * Stores allocated table config, used algo and kidx + * inside @ptc, @pta and @pkidx (if non-NULL). + * Reference created table if @compat is non-zero. + * + * Returns 0 on success. + */ +static int +create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, + char *aname, ipfw_xtable_info *i, struct table_config **ptc, + struct table_algo **pta, uint16_t *pkidx, int compat) +{ + struct namedobj_instance *ni; + struct table_config *tc, *tc_new, *tmp; + struct table_algo *ta; + uint16_t kidx; + + ni = CHAIN_TO_NI(ch); + + ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname); + if (ta == NULL) + return (ENOTSUP); + + tc = alloc_table_config(ch, ti, ta, aname, i->tflags, i->vtype); + if (tc == NULL) + return (ENOMEM); + + tc->vftype = i->vftype; + tc->limit = i->limit; + tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0; + + IPFW_UH_WLOCK(ch); + + /* Check if table has been already created */ + tc_new = find_table(ni, ti); + if (tc_new != NULL) { + + /* + * Compat: do not fail if we're + * requesting to create existing table + * which has the same type / vtype + */ + if (compat == 0 || tc_new->no.type != tc->no.type || + tc_new->vtype != tc->vtype) { + IPFW_UH_WUNLOCK(ch); + free_table_config(ni, tc); + return (EEXIST); + } + + /* Exchange tc and tc_new for proper refcounting & freeing */ + tmp = tc; + tc = tc_new; + tc_new = tmp; + } else { + /* New table */ + if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) { + IPFW_UH_WUNLOCK(ch); + printf("Unable to allocate table index." + " Consider increasing net.inet.ip.fw.tables_max"); + free_table_config(ni, tc); + return (EBUSY); + } + tc->no.kidx = kidx; + + IPFW_WLOCK(ch); + link_table(ch, tc); + IPFW_WUNLOCK(ch); + } + + if (compat != 0) + tc->no.refcnt++; + if (ptc != NULL) + *ptc = tc; + if (pta != NULL) + *pta = ta; + if (pkidx != NULL) + *pkidx = tc->no.kidx; + + IPFW_UH_WUNLOCK(ch); + + if (tc_new != NULL) + free_table_config(ni, tc_new); + + return (0); +} + +static void +ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti) +{ + + memset(ti, 0, sizeof(struct tid_info)); + ti->set = ntlv->set; + ti->uidx = ntlv->idx; + ti->tlvs = ntlv; + ti->tlen = ntlv->head.length; +} + +static void +objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti) +{ + + ntlv_to_ti(&oh->ntlv, ti); +} + +/* + * Exports basic table info as name TLV. + * Used inside dump_static_rules() to provide info + * about all tables referenced by current ruleset. + * + * Returns 0 on success. + */ +int +ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, + struct sockopt_data *sd) +{ + struct namedobj_instance *ni; + struct named_object *no; + ipfw_obj_ntlv *ntlv; + + ni = CHAIN_TO_NI(ch); + + no = ipfw_objhash_lookup_kidx(ni, kidx); + KASSERT(no != NULL, ("invalid table kidx passed")); + + ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); + if (ntlv == NULL) + return (ENOMEM); + + ntlv->head.type = IPFW_TLV_TBL_NAME; + ntlv->head.length = sizeof(*ntlv); + ntlv->idx = no->kidx; + strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); + return (0); } +/* + * Marks every table kidx used in @rule with bit in @bmask. + * Used to generate bitmask of referenced tables for given ruleset. + * + * Returns number of newly-referenced tables. + */ +int +ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule, + uint32_t *bmask) +{ + int cmdlen, l, count; + ipfw_insn *cmd; + uint16_t kidx; + uint8_t type; + + l = rule->cmd_len; + cmd = rule->cmd; + cmdlen = 0; + count = 0; + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + if (classify_table_opcode(cmd, &kidx, &type) != 0) + continue; + + if ((bmask[kidx / 32] & (1 << (kidx % 32))) == 0) + count++; + + bmask[kidx / 32] |= 1 << (kidx % 32); + } + + return (count); +} + + +/* + * Exports table @tc info into standard ipfw_xtable_info format. + */ +static void +export_table_info(struct ip_fw_chain *ch, struct table_config *tc, + ipfw_xtable_info *i) +{ + struct table_info *ti; + struct table_algo *ta; + + i->type = tc->no.type; + i->tflags = tc->tflags; + i->vtype = tc->vtype; + i->vftype = tc->vftype; + i->set = tc->no.set; + i->kidx = tc->no.kidx; + i->refcnt = tc->no.refcnt; + i->count = tc->count; + i->limit = tc->limit; + i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0; + i->size = tc->count * sizeof(ipfw_obj_tentry); + i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); + strlcpy(i->tablename, tc->tablename, sizeof(i->tablename)); + ti = KIDX_TO_TI(ch, tc->no.kidx); + ta = tc->ta; + if (ta->print_config != NULL) { + /* Use algo function to print table config to string */ + ta->print_config(tc->astate, ti, i->algoname, + sizeof(i->algoname)); + } else + strlcpy(i->algoname, ta->name, sizeof(i->algoname)); + /* Dump algo-specific data, if possible */ + if (ta->dump_tinfo != NULL) { + ta->dump_tinfo(tc->astate, ti, &i->ta_info); + i->ta_info.flags |= IPFW_TATFLAGS_DATA; + } +} + +struct dump_table_args { + struct ip_fw_chain *ch; + struct sockopt_data *sd; +}; + +static void +export_table_internal(struct namedobj_instance *ni, struct named_object *no, + void *arg) +{ + ipfw_xtable_info *i; + struct dump_table_args *dta; + + dta = (struct dump_table_args *)arg; + + i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i)); + KASSERT(i != 0, ("previously checked buffer is not enough")); + + export_table_info(dta->ch, (struct table_config *)no, i); +} + +/* + * Export all tables as ipfw_xtable_info structures to + * storage provided by @sd. + * + * If supplied buffer is too small, fills in required size + * and returns ENOMEM. + * Returns 0 on success. + */ static int -count_table_entry(struct radix_node *rn, void *arg) +export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, + struct sockopt_data *sd) { - u_int32_t * const cnt = arg; + uint32_t size; + uint32_t count; + struct dump_table_args dta; + + count = ipfw_objhash_count(CHAIN_TO_NI(ch)); + size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader); + + /* Fill in header regadless of buffer size */ + olh->count = count; + olh->objsize = sizeof(ipfw_xtable_info); + + if (size > olh->size) { + olh->size = size; + return (ENOMEM); + } + + olh->size = size; + + dta.ch = ch; + dta.sd = sd; + + ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta); - (*cnt)++; return (0); } +struct dump_args { + struct table_info *ti; + struct table_config *tc; + struct sockopt_data *sd; + uint32_t cnt; + uint16_t uidx; + int error; + ipfw_table_entry *ent; + uint32_t size; + ipfw_obj_tentry tent; +}; + int -ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) +ipfw_dump_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) { - struct radix_node_head *rnh; + int error; + + switch (op3->version) { + case 0: + error = ipfw_dump_table_v0(ch, sd); + break; + case 1: + error = ipfw_dump_table_v1(ch, sd); + break; + default: + error = ENOTSUP; + } + + return (error); +} + +/* + * Dumps all table data + * Data layout (v1)(current): + * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size + * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ] + * + * Returns 0 on success + */ +static int +ipfw_dump_table_v1(struct ip_fw_chain *ch, struct sockopt_data *sd) +{ + struct _ipfw_obj_header *oh; + ipfw_xtable_info *i; + struct tid_info ti; + struct table_config *tc; + struct table_algo *ta; + struct dump_args da; + uint32_t sz; + + sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); + oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); + if (oh == NULL) + return (EINVAL); + + i = (ipfw_xtable_info *)(oh + 1); + objheader_to_ti(oh, &ti); + + IPFW_UH_RLOCK(ch); + if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { + IPFW_UH_RUNLOCK(ch); + return (ESRCH); + } + export_table_info(ch, tc, i); + + if (sd->valsize < i->size) { + + /* + * Submitted buffer size is not enough. + * WE've already filled in @i structure with + * relevant table info including size, so we + * can return. Buffer will be flushed automatically. + */ + IPFW_UH_RUNLOCK(ch); + return (ENOMEM); + } + + /* + * Do the actual dump in eXtended format + */ + memset(&da, 0, sizeof(da)); + da.ti = KIDX_TO_TI(ch, tc->no.kidx); + da.tc = tc; + da.sd = sd; + + ta = tc->ta; - if (tbl >= V_fw_tables_max) + ta->foreach(tc->astate, da.ti, dump_table_tentry, &da); + IPFW_UH_RUNLOCK(ch); + + return (da.error); +} + +/* + * Dumps all table data + * Data layout (version 0)(legacy): + * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE() + * Reply: [ ipfw_xtable ipfw_table_xentry x N ] + * + * Returns 0 on success + */ +static int +ipfw_dump_table_v0(struct ip_fw_chain *ch, struct sockopt_data *sd) +{ + ipfw_xtable *xtbl; + struct tid_info ti; + struct table_config *tc; + struct table_algo *ta; + struct dump_args da; + size_t sz; + + xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable)); + if (xtbl == NULL) return (EINVAL); - *cnt = 0; - if ((rnh = ch->tables[tbl]) == NULL) + + memset(&ti, 0, sizeof(ti)); + ti.uidx = xtbl->tbl; + + IPFW_UH_RLOCK(ch); + if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { + IPFW_UH_RUNLOCK(ch); return (0); - rnh->rnh_walktree(rnh, count_table_entry, cnt); + } + sz = tc->count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable); + + xtbl->cnt = tc->count; + xtbl->size = sz; + xtbl->type = tc->no.type; + xtbl->tbl = ti.uidx; + + if (sd->valsize < sz) { + + /* + * Submitted buffer size is not enough. + * WE've already filled in @i structure with + * relevant table info including size, so we + * can return. Buffer will be flushed automatically. + */ + IPFW_UH_RUNLOCK(ch); + return (ENOMEM); + } + + /* Do the actual dump in eXtended format */ + memset(&da, 0, sizeof(da)); + da.ti = KIDX_TO_TI(ch, tc->no.kidx); + da.tc = tc; + da.sd = sd; + + ta = tc->ta; + + ta->foreach(tc->astate, da.ti, dump_table_xentry, &da); + IPFW_UH_RUNLOCK(ch); + + return (0); +} + +/* + * Legacy IP_FW_TABLE_GETSIZE handler + */ +int +ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) +{ + struct table_config *tc; + + if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) + return (ESRCH); + *cnt = tc->count; + return (0); +} + +/* + * Legacy IP_FW_TABLE_XGETSIZE handler + */ +int +ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) +{ + struct table_config *tc; + + if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) { + *cnt = 0; + return (0); /* 'table all list' requires success */ + } + *cnt = tc->count * sizeof(ipfw_table_xentry); + if (tc->count > 0) + *cnt += sizeof(ipfw_xtable); return (0); } static int -dump_table_entry(struct radix_node *rn, void *arg) +dump_table_entry(void *e, void *arg) { - struct table_entry * const n = (struct table_entry *)rn; - ipfw_table * const tbl = arg; + struct dump_args *da; + struct table_config *tc; + struct table_algo *ta; ipfw_table_entry *ent; + int error; + + da = (struct dump_args *)arg; + + tc = da->tc; + ta = tc->ta; - if (tbl->cnt == tbl->size) + /* Out of memory, returning */ + if (da->cnt == da->size) return (1); - ent = &tbl->ent[tbl->cnt]; - ent->tbl = tbl->tbl; - if (in_nullhost(n->mask.sin_addr)) - ent->masklen = 0; - else - ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); - ent->addr = n->addr.sin_addr.s_addr; - ent->value = n->value; - tbl->cnt++; + ent = da->ent++; + ent->tbl = da->uidx; + da->cnt++; + + error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); + if (error != 0) + return (error); + + ent->addr = da->tent.k.addr.s_addr; + ent->masklen = da->tent.masklen; + ent->value = da->tent.value; + return (0); } +/* + * Dumps table in pre-8.1 legacy format. + */ int -ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) +ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, + ipfw_table *tbl) { - struct radix_node_head *rnh; + struct table_config *tc; + struct table_algo *ta; + struct dump_args da; - if (tbl->tbl >= V_fw_tables_max) - return (EINVAL); tbl->cnt = 0; - if ((rnh = ch->tables[tbl->tbl]) == NULL) + + if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) + return (0); /* XXX: We should return ESRCH */ + + ta = tc->ta; + + /* This dump format supports IPv4 only */ + if (tc->no.type != IPFW_TABLE_CIDR) return (0); - rnh->rnh_walktree(rnh, dump_table_entry, tbl); + + memset(&da, 0, sizeof(da)); + da.ti = KIDX_TO_TI(ch, tc->no.kidx); + da.tc = tc; + da.ent = &tbl->ent[0]; + da.size = tbl->size; + + tbl->cnt = 0; + ta->foreach(tc->astate, da.ti, dump_table_entry, &da); + tbl->cnt = da.cnt; + return (0); } +/* + * Dumps table entry in eXtended format (v1)(current). + */ static int -count_table_xentry(struct radix_node *rn, void *arg) +dump_table_tentry(void *e, void *arg) { - uint32_t * const cnt = arg; + struct dump_args *da; + struct table_config *tc; + struct table_algo *ta; + ipfw_obj_tentry *tent; + + da = (struct dump_args *)arg; + + tc = da->tc; + ta = tc->ta; + + tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent)); + /* Out of memory, returning */ + if (tent == NULL) { + da->error = ENOMEM; + return (1); + } + tent->head.length = sizeof(ipfw_obj_tentry); + tent->idx = da->uidx; + + return (ta->dump_tentry(tc->astate, da->ti, e, tent)); +} + +/* + * Dumps table entry in eXtended format (v0). + */ +static int +dump_table_xentry(void *e, void *arg) +{ + struct dump_args *da; + struct table_config *tc; + struct table_algo *ta; + ipfw_table_xentry *xent; + ipfw_obj_tentry *tent; + int error; + + da = (struct dump_args *)arg; + + tc = da->tc; + ta = tc->ta; + + xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent)); + /* Out of memory, returning */ + if (xent == NULL) + return (1); + xent->len = sizeof(ipfw_table_xentry); + xent->tbl = da->uidx; + + memset(&da->tent, 0, sizeof(da->tent)); + tent = &da->tent; + error = ta->dump_tentry(tc->astate, da->ti, e, tent); + if (error != 0) + return (error); + + /* Convert current format to previous one */ + xent->masklen = tent->masklen; + xent->value = tent->value; + /* Apply some hacks */ + if (tc->no.type == IPFW_TABLE_CIDR && tent->subtype == AF_INET) { + xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr; + xent->flags = IPFW_TCF_INET; + } else + memcpy(&xent->k, &tent->k, sizeof(xent->k)); - (*cnt) += sizeof(ipfw_table_xentry); return (0); } +/* + * Table algorithms + */ + +/* + * Finds algoritm by index, table type or supplied name. + * + * Returns pointer to algo or NULL. + */ +static struct table_algo * +find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) +{ + int i, l; + struct table_algo *ta; + + if (ti->type > IPFW_TABLE_MAXTYPE) + return (NULL); + + /* Search by index */ + if (ti->atype != 0) { + if (ti->atype > tcfg->algo_count) + return (NULL); + return (tcfg->algo[ti->atype]); + } + + /* Search by name if supplied */ + if (name != NULL) { + /* TODO: better search */ + for (i = 1; i <= tcfg->algo_count; i++) { + ta = tcfg->algo[i]; + + /* + * One can supply additional algorithm + * parameters so we compare only the first word + * of supplied name: + * 'hash_cidr hsize=32' + * '^^^^^^^^^' + * + */ + l = strlen(ta->name); + if (strncmp(name, ta->name, l) == 0) { + if (name[l] == '\0' || name[l] == ' ') + return (ta); + } + } + + return (NULL); + } + + /* Return default algorithm for given type if set */ + return (tcfg->def_algo[ti->type]); +} + +/* + * Register new table algo @ta. + * Stores algo id inside @idx. + * + * Returns 0 on success. + */ int -ipfw_count_xtable(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) +ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, + int *idx) { - struct radix_node_head *rnh; + struct tables_config *tcfg; + struct table_algo *ta_new; + size_t sz; - if (tbl >= V_fw_tables_max) + if (size > sizeof(struct table_algo)) return (EINVAL); - *cnt = 0; - if ((rnh = ch->tables[tbl]) != NULL) - rnh->rnh_walktree(rnh, count_table_xentry, cnt); - if ((rnh = ch->xtables[tbl]) != NULL) - rnh->rnh_walktree(rnh, count_table_xentry, cnt); - /* Return zero if table is empty */ - if (*cnt > 0) - (*cnt) += sizeof(ipfw_xtable); + + /* Check for the required on-stack size for add/del */ + sz = roundup2(ta->ta_buf_size, sizeof(void *)); + if (sz > TA_BUF_SZ) + return (EINVAL); + + KASSERT(ta->type >= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE")); + + /* Copy algorithm data to stable storage. */ + ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO); + memcpy(ta_new, ta, size); + + tcfg = CHAIN_TO_TCFG(ch); + + KASSERT(tcfg->algo_count < 255, ("Increase algo array size")); + + tcfg->algo[++tcfg->algo_count] = ta_new; + ta_new->idx = tcfg->algo_count; + + /* Set algorithm as default one for given type */ + if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 && + tcfg->def_algo[ta_new->type] == NULL) + tcfg->def_algo[ta_new->type] = ta_new; + + *idx = ta_new->idx; + return (0); } +/* + * Unregisters table algo using @idx as id. + * XXX: It is NOT safe to call this function in any place + * other than ipfw instance destroy handler. + */ +void +ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) +{ + struct tables_config *tcfg; + struct table_algo *ta; -static int -dump_table_xentry_base(struct radix_node *rn, void *arg) + tcfg = CHAIN_TO_TCFG(ch); + + KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d", + idx, tcfg->algo_count)); + + ta = tcfg->algo[idx]; + KASSERT(ta != NULL, ("algo idx %d is NULL", idx)); + + if (tcfg->def_algo[ta->type] == ta) + tcfg->def_algo[ta->type] = NULL; + + free(ta, M_IPFW); +} + +/* + * Lists all table algorithms currently available. + * Data layout (v0)(current): + * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size + * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ] + * + * Returns 0 on success + */ +int +ipfw_list_table_algo(struct ip_fw_chain *ch, struct sockopt_data *sd) { - struct table_entry * const n = (struct table_entry *)rn; - ipfw_xtable * const tbl = arg; - ipfw_table_xentry *xent; + struct _ipfw_obj_lheader *olh; + struct tables_config *tcfg; + ipfw_ta_info *i; + struct table_algo *ta; + uint32_t count, n, size; + + olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); + if (olh == NULL) + return (EINVAL); + if (sd->valsize < olh->size) + return (EINVAL); + + IPFW_UH_RLOCK(ch); + tcfg = CHAIN_TO_TCFG(ch); + count = tcfg->algo_count; + size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader); + + /* Fill in header regadless of buffer size */ + olh->count = count; + olh->objsize = sizeof(ipfw_ta_info); + + if (size > olh->size) { + olh->size = size; + IPFW_UH_RUNLOCK(ch); + return (ENOMEM); + } + olh->size = size; + + for (n = 1; n <= count; n++) { + i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i)); + KASSERT(i != 0, ("previously checked buffer is not enough")); + ta = tcfg->algo[n]; + strlcpy(i->algoname, ta->name, sizeof(i->algoname)); + i->type = ta->type; + i->refcnt = ta->refcnt; + } + + IPFW_UH_RUNLOCK(ch); - /* Out of memory, returning */ - if (tbl->cnt == tbl->size) - return (1); - xent = &tbl->xent[tbl->cnt]; - xent->len = sizeof(ipfw_table_xentry); - xent->tbl = tbl->tbl; - if (in_nullhost(n->mask.sin_addr)) - xent->masklen = 0; - else - xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); - /* Save IPv4 address as deprecated IPv6 compatible */ - xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr; - xent->flags = IPFW_TCF_INET; - xent->value = n->value; - tbl->cnt++; return (0); } +/* + * Tables rewriting code + */ + +/* + * Determine table number and lookup type for @cmd. + * Fill @tbl and @type with appropriate values. + * Returns 0 for relevant opcodes, 1 otherwise. + */ static int -dump_table_xentry_extended(struct radix_node *rn, void *arg) +classify_table_opcode(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) { - struct table_xentry * const n = (struct table_xentry *)rn; - ipfw_xtable * const tbl = arg; - ipfw_table_xentry *xent; -#ifdef INET6 - int i; - uint32_t *v; -#endif - /* Out of memory, returning */ - if (tbl->cnt == tbl->size) - return (1); - xent = &tbl->xent[tbl->cnt]; - xent->len = sizeof(ipfw_table_xentry); - xent->tbl = tbl->tbl; - - switch (tbl->type) { -#ifdef INET6 - case IPFW_TABLE_CIDR: - /* Count IPv6 mask */ - v = (uint32_t *)&n->m.mask6.sin6_addr; - for (i = 0; i < sizeof(struct in6_addr) / 4; i++, v++) - xent->masklen += bitcount32(*v); - memcpy(&xent->k, &n->a.addr6.sin6_addr, sizeof(struct in6_addr)); + ipfw_insn_if *cmdif; + int skip; + uint16_t v; + + skip = 1; + + switch (cmd->opcode) { + case O_IP_SRC_LOOKUP: + case O_IP_DST_LOOKUP: + /* Basic IPv4/IPv6 or u32 lookups */ + *puidx = cmd->arg1; + /* Assume CIDR by default */ + *ptype = IPFW_TABLE_CIDR; + skip = 0; + + if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) { + /* + * generic lookup. The key must be + * in 32bit big-endian format. + */ + v = ((ipfw_insn_u32 *)cmd)->d[1]; + switch (v) { + case 0: + case 1: + /* IPv4 src/dst */ + break; + case 2: + case 3: + /* src/dst port */ + *ptype = IPFW_TABLE_NUMBER; + break; + case 4: + /* uid/gid */ + *ptype = IPFW_TABLE_NUMBER; + break; + case 5: + /* jid */ + *ptype = IPFW_TABLE_NUMBER; + break; + case 6: + /* dscp */ + *ptype = IPFW_TABLE_NUMBER; + break; + } + } break; -#endif - case IPFW_TABLE_INTERFACE: - /* Assume exact mask */ - xent->masklen = 8 * IF_NAMESIZE; - memcpy(&xent->k, &n->a.iface.ifname, IF_NAMESIZE); + case O_XMIT: + case O_RECV: + case O_VIA: + /* Interface table, possibly */ + cmdif = (ipfw_insn_if *)cmd; + if (cmdif->name[0] != '\1') + break; + + *ptype = IPFW_TABLE_INTERFACE; + *puidx = cmdif->p.kidx; + skip = 0; + break; + case O_IP_FLOW_LOOKUP: + *puidx = cmd->arg1; + *ptype = IPFW_TABLE_FLOW; + skip = 0; break; - - default: - /* unknown, skip entry */ - return (0); } - xent->value = n->value; - tbl->cnt++; - return (0); + return (skip); } +/* + * Sets new table value for given opcode. + * Assume the same opcodes as classify_table_opcode() + */ +static void +update_table_opcode(ipfw_insn *cmd, uint16_t idx) +{ + ipfw_insn_if *cmdif; + + switch (cmd->opcode) { + case O_IP_SRC_LOOKUP: + case O_IP_DST_LOOKUP: + /* Basic IPv4/IPv6 or u32 lookups */ + cmd->arg1 = idx; + break; + case O_XMIT: + case O_RECV: + case O_VIA: + /* Interface table, possibly */ + cmdif = (ipfw_insn_if *)cmd; + cmdif->p.kidx = idx; + break; + case O_IP_FLOW_LOOKUP: + cmd->arg1 = idx; + break; + } +} + +/* + * Checks table name for validity. + * Enforce basic length checks, the rest + * should be done in userland. + * + * Returns 0 if name is considered valid. + */ int -ipfw_dump_xtable(struct ip_fw_chain *ch, ipfw_xtable *tbl) +ipfw_check_table_name(char *name) { - struct radix_node_head *rnh; + int nsize; + ipfw_obj_ntlv *ntlv = NULL; - if (tbl->tbl >= V_fw_tables_max) + nsize = sizeof(ntlv->name); + + if (strnlen(name, nsize) == nsize) return (EINVAL); - tbl->cnt = 0; - tbl->type = ch->tabletype[tbl->tbl]; - if ((rnh = ch->tables[tbl->tbl]) != NULL) - rnh->rnh_walktree(rnh, dump_table_xentry_base, tbl); - if ((rnh = ch->xtables[tbl->tbl]) != NULL) - rnh->rnh_walktree(rnh, dump_table_xentry_extended, tbl); + + if (name[0] == '\0') + return (EINVAL); + + /* + * TODO: do some more complicated checks + */ + return (0); } -/* end of file */ +/* + * Find tablename TLV by @uid. + * Check @tlvs for valid data inside. + * + * Returns pointer to found TLV or NULL. + */ +static ipfw_obj_ntlv * +find_name_tlv(void *tlvs, int len, uint16_t uidx) +{ + ipfw_obj_ntlv *ntlv; + uintptr_t pa, pe; + int l; + + pa = (uintptr_t)tlvs; + pe = pa + len; + l = 0; + for (; pa < pe; pa += l) { + ntlv = (ipfw_obj_ntlv *)pa; + l = ntlv->head.length; + + if (l != sizeof(*ntlv)) + return (NULL); + + if (ntlv->head.type != IPFW_TLV_TBL_NAME) + continue; + + if (ntlv->idx != uidx) + continue; + + if (ipfw_check_table_name(ntlv->name) != 0) + return (NULL); + + return (ntlv); + } + + return (NULL); +} + +/* + * Finds table config based on either legacy index + * or name in ntlv. + * Note @ti structure contains unchecked data from userland. + * + * Returns pointer to table_config or NULL. + */ +static struct table_config * +find_table(struct namedobj_instance *ni, struct tid_info *ti) +{ + char *name, bname[16]; + struct named_object *no; + ipfw_obj_ntlv *ntlv; + uint32_t set; + + if (ti->tlvs != NULL) { + ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx); + if (ntlv == NULL) + return (NULL); + name = ntlv->name; + + /* + * Use set provided by @ti instead of @ntlv one. + * This is needed due to different sets behavior + * controlled by V_fw_tables_sets. + */ + set = ti->set; + } else { + snprintf(bname, sizeof(bname), "%d", ti->uidx); + name = bname; + set = 0; + } + + no = ipfw_objhash_lookup_name(ni, set, name); + + return ((struct table_config *)no); +} + +/* + * Allocate new table config structure using + * specified @algo and @aname. + * + * Returns pointer to config or NULL. + */ +static struct table_config * +alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, + struct table_algo *ta, char *aname, uint8_t tflags, uint8_t vtype) +{ + char *name, bname[16]; + struct table_config *tc; + int error; + ipfw_obj_ntlv *ntlv; + uint32_t set; + + if (ti->tlvs != NULL) { + ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx); + if (ntlv == NULL) + return (NULL); + name = ntlv->name; + set = ntlv->set; + } else { + snprintf(bname, sizeof(bname), "%d", ti->uidx); + name = bname; + set = 0; + } + + tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO); + tc->no.name = tc->tablename; + tc->no.type = ti->type; + tc->no.set = set; + tc->tflags = tflags; + tc->ta = ta; + strlcpy(tc->tablename, name, sizeof(tc->tablename)); + tc->vtype = vtype; + + if (ti->tlvs == NULL) { + tc->no.compat = 1; + tc->no.uidx = ti->uidx; + } + + /* Preallocate data structures for new tables */ + error = ta->init(ch, &tc->astate, &tc->ti, aname, tflags); + if (error != 0) { + free(tc, M_IPFW); + return (NULL); + } + + return (tc); +} + +/* + * Destroys table state and config. + */ +static void +free_table_config(struct namedobj_instance *ni, struct table_config *tc) +{ + + KASSERT(tc->linked == 0, ("free() on linked config")); + + /* + * We're using ta without any locking/referencing. + * TODO: fix this if we're going to use unloadable algos. + */ + tc->ta->destroy(tc->astate, &tc->ti); + free(tc, M_IPFW); +} + +/* + * Links @tc to @chain table named instance. + * Sets appropriate type/states in @chain table info. + */ +static void +link_table(struct ip_fw_chain *ch, struct table_config *tc) +{ + struct namedobj_instance *ni; + struct table_info *ti; + uint16_t kidx; + + IPFW_UH_WLOCK_ASSERT(ch); + IPFW_WLOCK_ASSERT(ch); + + ni = CHAIN_TO_NI(ch); + kidx = tc->no.kidx; + + ipfw_objhash_add(ni, &tc->no); + + ti = KIDX_TO_TI(ch, kidx); + *ti = tc->ti; + + /* Notify algo on real @ti address */ + if (tc->ta->change_ti != NULL) + tc->ta->change_ti(tc->astate, ti); + + tc->linked = 1; + tc->ta->refcnt++; +} + +/* + * Unlinks @tc from @chain table named instance. + * Zeroes states in @chain and stores them in @tc. + */ +static void +unlink_table(struct ip_fw_chain *ch, struct table_config *tc) +{ + struct namedobj_instance *ni; + struct table_info *ti; + uint16_t kidx; + + IPFW_UH_WLOCK_ASSERT(ch); + IPFW_WLOCK_ASSERT(ch); + + ni = CHAIN_TO_NI(ch); + kidx = tc->no.kidx; + + /* Clear state. @ti copy is already saved inside @tc */ + ipfw_objhash_del(ni, &tc->no); + ti = KIDX_TO_TI(ch, kidx); + memset(ti, 0, sizeof(struct table_info)); + tc->linked = 0; + tc->ta->refcnt--; + + /* Notify algo on real @ti address */ + if (tc->ta->change_ti != NULL) + tc->ta->change_ti(tc->astate, NULL); +} + +struct swap_table_args { + int set; + int new_set; + int mv; +}; + +/* + * Change set for each matching table. + * + * Ensure we dispatch each table once by setting/checking ochange + * fields. + */ +static void +swap_table_set(struct namedobj_instance *ni, struct named_object *no, + void *arg) +{ + struct table_config *tc; + struct swap_table_args *sta; + + tc = (struct table_config *)no; + sta = (struct swap_table_args *)arg; + + if (no->set != sta->set && (no->set != sta->new_set || sta->mv != 0)) + return; + + if (tc->ochanged != 0) + return; + + tc->ochanged = 1; + ipfw_objhash_del(ni, no); + if (no->set == sta->set) + no->set = sta->new_set; + else + no->set = sta->set; + ipfw_objhash_add(ni, no); +} + +/* + * Cleans up ochange field for all tables. + */ +static void +clean_table_set_data(struct namedobj_instance *ni, struct named_object *no, + void *arg) +{ + struct table_config *tc; + struct swap_table_args *sta; + + tc = (struct table_config *)no; + sta = (struct swap_table_args *)arg; + + tc->ochanged = 0; +} + +/* + * Swaps tables within two sets. + */ +void +ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t set, + uint32_t new_set, int mv) +{ + struct swap_table_args sta; + + IPFW_UH_WLOCK_ASSERT(ch); + + sta.set = set; + sta.new_set = new_set; + sta.mv = mv; + + ipfw_objhash_foreach(CHAIN_TO_NI(ch), swap_table_set, &sta); + ipfw_objhash_foreach(CHAIN_TO_NI(ch), clean_table_set_data, &sta); +} + +/* + * Move all tables which are reference by rules in @rr to set @new_set. + * Makes sure that all relevant tables are referenced ONLLY by given rules. + * + * Retuns 0 on success, + */ +int +ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt, + uint32_t new_set) +{ + struct ip_fw *rule; + struct table_config *tc; + struct named_object *no; + struct namedobj_instance *ni; + int bad, i, l, cmdlen; + uint16_t kidx; + uint8_t type; + ipfw_insn *cmd; + + IPFW_UH_WLOCK_ASSERT(ch); + + ni = CHAIN_TO_NI(ch); + + /* Stage 1: count number of references by given rules */ + for (i = 0; i < ch->n_rules - 1; i++) { + rule = ch->map[i]; + if (ipfw_match_range(rule, rt) == 0) + continue; + + l = rule->cmd_len; + cmd = rule->cmd; + cmdlen = 0; + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + if (classify_table_opcode(cmd, &kidx, &type) != 0) + continue; + no = ipfw_objhash_lookup_kidx(ni, kidx); + KASSERT(no != NULL, + ("objhash lookup failed on index %d", kidx)); + tc = (struct table_config *)no; + tc->ocount++; + } + + } + + /* Stage 2: verify "ownership" */ + bad = 0; + for (i = 0; i < ch->n_rules - 1; i++) { + rule = ch->map[i]; + if (ipfw_match_range(rule, rt) == 0) + continue; + + l = rule->cmd_len; + cmd = rule->cmd; + cmdlen = 0; + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + if (classify_table_opcode(cmd, &kidx, &type) != 0) + continue; + no = ipfw_objhash_lookup_kidx(ni, kidx); + KASSERT(no != NULL, + ("objhash lookup failed on index %d", kidx)); + tc = (struct table_config *)no; + if (tc->no.refcnt != tc->ocount) { + + /* + * Number of references differ: + * Other rule(s) are holding reference to given + * table, so it is not possible to change its set. + * + * Note that refcnt may account + * references to some going-to-be-added rules. + * Since we don't know their numbers (and event + * if they will be added) it is perfectly OK + * to return error here. + */ + bad = 1; + break; + } + } + + if (bad != 0) + break; + } + + /* Stage 3: change set or cleanup */ + for (i = 0; i < ch->n_rules - 1; i++) { + rule = ch->map[i]; + if (ipfw_match_range(rule, rt) == 0) + continue; + + l = rule->cmd_len; + cmd = rule->cmd; + cmdlen = 0; + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + if (classify_table_opcode(cmd, &kidx, &type) != 0) + continue; + no = ipfw_objhash_lookup_kidx(ni, kidx); + KASSERT(no != NULL, + ("objhash lookup failed on index %d", kidx)); + tc = (struct table_config *)no; + + tc->ocount = 0; + if (bad != 0) + continue; + + /* Actually change set. */ + ipfw_objhash_del(ni, no); + no->set = new_set; + ipfw_objhash_add(ni, no); + } + } + + return (bad); +} + +/* + * Finds and bumps refcount for tables referenced by given @rule. + * Auto-creates non-existing tables. + * Fills in @oib array with userland/kernel indexes. + * First free oidx pointer is saved back in @oib. + * + * Returns 0 on success. + */ +static int +find_ref_rule_tables(struct ip_fw_chain *ch, struct ip_fw *rule, + struct rule_check_info *ci, struct obj_idx **oib, struct tid_info *ti) +{ + struct table_config *tc; + struct namedobj_instance *ni; + struct named_object *no; + int cmdlen, error, l, numnew; + uint16_t kidx; + ipfw_insn *cmd; + struct obj_idx *pidx, *pidx_first, *p; + + pidx_first = *oib; + pidx = pidx_first; + l = rule->cmd_len; + cmd = rule->cmd; + cmdlen = 0; + error = 0; + numnew = 0; + + IPFW_UH_WLOCK(ch); + ni = CHAIN_TO_NI(ch); + + /* Increase refcount on each existing referenced table. */ + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + if (classify_table_opcode(cmd, &ti->uidx, &ti->type) != 0) + continue; + + pidx->uidx = ti->uidx; + pidx->type = ti->type; + + if ((tc = find_table(ni, ti)) != NULL) { + if (tc->no.type != ti->type) { + /* Incompatible types */ + error = EINVAL; + break; + } + + /* Reference found table and save kidx */ + tc->no.refcnt++; + pidx->kidx = tc->no.kidx; + pidx++; + continue; + } + + /* + * Compability stuff for old clients: + * prepare to manually create non-existing tables. + */ + pidx++; + numnew++; + } + + if (error != 0) { + /* Unref everything we have already done */ + for (p = *oib; p < pidx; p++) { + if (p->kidx == 0) + continue; + + /* Find & unref by existing idx */ + no = ipfw_objhash_lookup_kidx(ni, p->kidx); + KASSERT(no != NULL, ("Ref'd table %d disappeared", + p->kidx)); + + no->refcnt--; + } + } + + IPFW_UH_WUNLOCK(ch); + + if (numnew == 0) { + *oib = pidx; + return (error); + } + + /* + * Compatibility stuff: do actual creation for non-existing, + * but referenced tables. + */ + for (p = pidx_first; p < pidx; p++) { + if (p->kidx != 0) + continue; + + ti->uidx = p->uidx; + ti->type = p->type; + ti->atype = 0; + + error = create_table_compat(ch, ti, NULL, NULL, &kidx); + if (error == 0) { + p->kidx = kidx; + continue; + } + + /* Error. We have to drop references */ + IPFW_UH_WLOCK(ch); + for (p = pidx_first; p < pidx; p++) { + if (p->kidx == 0) + continue; + + /* Find & unref by existing idx */ + no = ipfw_objhash_lookup_kidx(ni, p->kidx); + KASSERT(no != NULL, ("Ref'd table %d disappeared", + p->kidx)); + + no->refcnt--; + } + IPFW_UH_WUNLOCK(ch); + + return (error); + } + + *oib = pidx; + + return (error); +} + +/* + * Remove references from every table used in @rule. + */ +void +ipfw_unref_rule_tables(struct ip_fw_chain *chain, struct ip_fw *rule) +{ + int cmdlen, l; + ipfw_insn *cmd; + struct namedobj_instance *ni; + struct named_object *no; + uint16_t kidx; + uint8_t type; + + IPFW_UH_WLOCK_ASSERT(chain); + ni = CHAIN_TO_NI(chain); + + l = rule->cmd_len; + cmd = rule->cmd; + cmdlen = 0; + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + if (classify_table_opcode(cmd, &kidx, &type) != 0) + continue; + + no = ipfw_objhash_lookup_kidx(ni, kidx); + + KASSERT(no != NULL, ("table id %d not found", kidx)); + KASSERT(no->type == type, ("wrong type %d (%d) for table id %d", + no->type, type, kidx)); + KASSERT(no->refcnt > 0, ("refcount for table %d is %d", + kidx, no->refcnt)); + + no->refcnt--; + } +} + +/* + * Compatibility function for old ipfw(8) binaries. + * Rewrites table kernel indices with userland ones. + * Convert tables matching '/^\d+$/' to their atoi() value. + * Use number 65535 for other tables. + * + * Returns 0 on success. + */ +int +ipfw_rewrite_table_kidx(struct ip_fw_chain *chain, struct ip_fw_rule0 *rule) +{ + int cmdlen, error, l; + ipfw_insn *cmd; + uint16_t kidx, uidx; + uint8_t type; + struct named_object *no; + struct namedobj_instance *ni; + + ni = CHAIN_TO_NI(chain); + error = 0; + + l = rule->cmd_len; + cmd = rule->cmd; + cmdlen = 0; + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + + if (classify_table_opcode(cmd, &kidx, &type) != 0) + continue; + + if ((no = ipfw_objhash_lookup_kidx(ni, kidx)) == NULL) + return (1); + + uidx = no->uidx; + if (no->compat == 0) { + + /* + * We are called via legacy opcode. + * Save error and show table as fake number + * not to make ipfw(8) hang. + */ + uidx = 65535; + error = 2; + } + + update_table_opcode(cmd, uidx); + } + + return (error); +} + +/* + * Checks is opcode is referencing table of appropriate type. + * Adds reference count for found table if true. + * Rewrites user-supplied opcode values with kernel ones. + * + * Returns 0 on success and appropriate error code otherwise. + */ +int +ipfw_rewrite_table_uidx(struct ip_fw_chain *chain, + struct rule_check_info *ci) +{ + int cmdlen, error, l; + ipfw_insn *cmd; + uint16_t uidx; + uint8_t type; + struct namedobj_instance *ni; + struct obj_idx *p, *pidx_first, *pidx_last; + struct tid_info ti; + + ni = CHAIN_TO_NI(chain); + + /* + * Prepare an array for storing opcode indices. + * Use stack allocation by default. + */ + if (ci->table_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) { + /* Stack */ + pidx_first = ci->obuf; + } else + pidx_first = malloc(ci->table_opcodes * sizeof(struct obj_idx), + M_IPFW, M_WAITOK | M_ZERO); + + pidx_last = pidx_first; + error = 0; + type = 0; + memset(&ti, 0, sizeof(ti)); + + /* + * Use default set for looking up tables (old way) or + * use set rule is assigned to (new way). + */ + ti.set = (V_fw_tables_sets != 0) ? ci->krule->set : 0; + if (ci->ctlv != NULL) { + ti.tlvs = (void *)(ci->ctlv + 1); + ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv); + } + + /* Reference all used tables */ + error = find_ref_rule_tables(chain, ci->krule, ci, &pidx_last, &ti); + if (error != 0) + goto free; + + IPFW_UH_WLOCK(chain); + + /* Perform rule rewrite */ + l = ci->krule->cmd_len; + cmd = ci->krule->cmd; + cmdlen = 0; + p = pidx_first; + for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + if (classify_table_opcode(cmd, &uidx, &type) != 0) + continue; + update_table_opcode(cmd, p->kidx); + p++; + } + + IPFW_UH_WUNLOCK(chain); + +free: + if (pidx_first != ci->obuf) + free(pidx_first, M_IPFW); + + return (error); +} + diff --git a/sys/netpfil/ipfw/ip_fw_table.h b/sys/netpfil/ipfw/ip_fw_table.h new file mode 100644 index 0000000..bcbffa8 --- /dev/null +++ b/sys/netpfil/ipfw/ip_fw_table.h @@ -0,0 +1,198 @@ +/*- + * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: projects/ipfw/sys/netpfil/ipfw/ip_fw_private.h 267467 2014-06-14 10:58:39Z melifaro $ + */ + +#ifndef _IPFW2_TABLE_H +#define _IPFW2_TABLE_H + +/* + * Internal constants and data structures used by ipfw tables + * not meant to be exported outside the kernel. + */ +#ifdef _KERNEL + +struct table_info { + table_lookup_t *lookup; /* Lookup function */ + void *state; /* Lookup radix/other structure */ + void *xstate; /* eXtended state */ + u_long data; /* Hints for given func */ +}; + +/* Internal structures for handling sockopt data */ +struct tid_info { + uint32_t set; /* table set */ + uint16_t uidx; /* table index */ + uint8_t type; /* table type */ + uint8_t atype; + void *tlvs; /* Pointer to first TLV */ + int tlen; /* Total TLV size block */ +}; + +struct tentry_info { + void *paddr; + uint8_t masklen; /* mask length */ + uint8_t subtype; + uint16_t flags; /* record flags */ + uint32_t value; /* value */ +}; +#define TEI_FLAGS_UPDATE 0x0001 /* Add or update rec if exists */ +#define TEI_FLAGS_UPDATED 0x0002 /* Entry has been updated */ +#define TEI_FLAGS_COMPAT 0x0004 /* Called from old ABI */ +#define TEI_FLAGS_DONTADD 0x0008 /* Do not create new rec */ +#define TEI_FLAGS_ADDED 0x0010 /* Entry was added */ +#define TEI_FLAGS_DELETED 0x0020 /* Entry was deleted */ +#define TEI_FLAGS_LIMIT 0x0040 /* Limit was hit */ +#define TEI_FLAGS_ERROR 0x0080 /* Unknown request error */ +#define TEI_FLAGS_NOTFOUND 0x0100 /* Entry was not found */ +#define TEI_FLAGS_EXISTS 0x0200 /* Entry already exists */ + +typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state, + struct table_info *ti, char *data, uint8_t tflags); +typedef void (ta_destroy)(void *ta_state, struct table_info *ti); +typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf); +typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf); +typedef int (ta_add)(void *ta_state, struct table_info *ti, + struct tentry_info *tei, void *ta_buf, uint32_t *pnum); +typedef int (ta_del)(void *ta_state, struct table_info *ti, + struct tentry_info *tei, void *ta_buf, uint32_t *pnum); +typedef void (ta_flush_entry)(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf); + +typedef int (ta_need_modify)(void *ta_state, struct table_info *ti, + uint32_t count, uint64_t *pflags); +typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags); +typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti, + void *ta_buf, uint64_t *pflags); +typedef void (ta_modify)(void *ta_state, struct table_info *ti, + void *ta_buf, uint64_t pflags); +typedef void (ta_flush_mod)(void *ta_buf); + +typedef void (ta_change_ti)(void *ta_state, struct table_info *ti); +typedef void (ta_print_config)(void *ta_state, struct table_info *ti, char *buf, + size_t bufsize); + +typedef int ta_foreach_f(void *node, void *arg); +typedef void ta_foreach(void *ta_state, struct table_info *ti, ta_foreach_f *f, + void *arg); +typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e, + ipfw_obj_tentry *tent); +typedef int ta_find_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent); +typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti, + ipfw_ta_tinfo *tinfo); + +struct table_algo { + char name[16]; + uint32_t idx; + uint32_t type; + uint32_t refcnt; + uint32_t flags; + size_t ta_buf_size; + ta_init *init; + ta_destroy *destroy; + ta_prepare_add *prepare_add; + ta_prepare_del *prepare_del; + ta_add *add; + ta_del *del; + ta_flush_entry *flush_entry; + ta_find_tentry *find_tentry; + ta_need_modify *need_modify; + ta_prepare_mod *prepare_mod; + ta_fill_mod *fill_mod; + ta_modify *modify; + ta_flush_mod *flush_mod; + ta_change_ti *change_ti; + ta_foreach *foreach; + ta_dump_tentry *dump_tentry; + ta_print_config *print_config; + ta_dump_tinfo *dump_tinfo; +}; +#define TA_FLAG_DEFAULT 0x01 /* Algorithm is default for given type */ + +int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, + size_t size, int *idx); +void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx); + +void ipfw_table_algo_init(struct ip_fw_chain *chain); +void ipfw_table_algo_destroy(struct ip_fw_chain *chain); + + +/* direct ipfw_ctl handlers */ +int ipfw_list_tables(struct ip_fw_chain *ch, struct sockopt_data *sd); +int ipfw_dump_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd); +int ipfw_describe_table(struct ip_fw_chain *ch, struct sockopt_data *sd); + +int ipfw_find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd); +int ipfw_create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd); +int ipfw_modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd); +int ipfw_manage_table_ent(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd); +int ipfw_flush_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd); +int ipfw_list_table_algo(struct ip_fw_chain *ch, struct sockopt_data *sd); +int ipfw_swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd); +/* Exported to support legacy opcodes */ +int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, + struct tentry_info *tei, uint8_t flags, uint32_t count); +int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, + struct tentry_info *tei, uint8_t flags, uint32_t count); +int flush_table(struct ip_fw_chain *ch, struct tid_info *ti); + +int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain, + struct rule_check_info *ci); +int ipfw_rewrite_table_kidx(struct ip_fw_chain *chain, + struct ip_fw_rule0 *rule); +int ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule, + uint32_t *bmask); +int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, + struct sockopt_data *sd); +void ipfw_unref_rule_tables(struct ip_fw_chain *chain, struct ip_fw *rule); + +/* utility functions */ +int ipfw_check_table_name(char *name); +int ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt, + uint32_t new_set); +void ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t old_set, + uint32_t new_set, int mv); + +/* Legacy interfaces */ +int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, + uint32_t *cnt); +int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, + uint32_t *cnt); +int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, + ipfw_table *tbl); + + +#endif /* _KERNEL */ +#endif /* _IPFW2_TABLE_H */ diff --git a/sys/netpfil/ipfw/ip_fw_table_algo.c b/sys/netpfil/ipfw/ip_fw_table_algo.c new file mode 100644 index 0000000..a2a5be7 --- /dev/null +++ b/sys/netpfil/ipfw/ip_fw_table_algo.c @@ -0,0 +1,3549 @@ +/*- + * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: projects/ipfw/sys/netpfil/ipfw/ip_fw_table.c 267384 2014-06-12 09:59:11Z melifaro $"); + +/* + * Lookup table algorithms. + * + */ + +#include "opt_ipfw.h" +#include "opt_inet.h" +#ifndef INET +#error IPFIREWALL requires INET. +#endif /* INET */ +#include "opt_inet6.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/rwlock.h> +#include <sys/socket.h> +#include <sys/queue.h> +#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ +#include <net/radix.h> + +#include <netinet/in.h> +#include <netinet/ip_var.h> /* struct ipfw_rule_ref */ +#include <netinet/ip_fw.h> + +#include <netpfil/ipfw/ip_fw_private.h> +#include <netpfil/ipfw/ip_fw_table.h> + + +/* + * IPFW table lookup algorithms. + * + * What is needed to add another table algo? + * + * Algo init: + * * struct table_algo has to be filled with: + * name: "type:algoname" format, e.g. "cidr:radix". Currently + * there are the following types: "cidr", "iface", "number" and "flow". + * type: one of IPFW_TABLE_* types + * flags: one or more TA_FLAGS_* + * ta_buf_size: size of structure used to store add/del item state. + * Needs to be less than TA_BUF_SZ. + * callbacks: see below for description. + * * ipfw_add_table_algo / ipfw_del_table_algo has to be called + * + * Callbacks description: + * + * -init: request to initialize new table instance. + * typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state, + * struct table_info *ti, char *data, uint8_t tflags); + * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success. + * + * Allocate all structures needed for normal operations. + * * Caller may want to parse @data for some algo-specific + * options provided by userland. + * * Caller may want to save configuration state pointer to @ta_state + * * Caller needs to save desired runtime structure pointer(s) + * inside @ti fields. Note that it is not correct to save + * @ti pointer at this moment. Use -change_ti hook for that. + * * Caller has to fill in ti->lookup to appropriate function + * pointer. + * + * + * + * -destroy: request to destroy table instance. + * typedef void (ta_destroy)(void *ta_state, struct table_info *ti); + * MANDATORY, may be locked (UH+WLOCK). (M_NOWAIT). + * + * Frees all table entries and all tables structures allocated by -init. + * + * + * + * -prepare_add: request to allocate state for adding new entry. + * typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei, + * void *ta_buf); + * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success. + * + * Buffer ta_buf of size ta->ta_buf_sz may be used to store + * allocated state. + * + * + * + * -prepare_del: request to set state for deleting existing entry. + * typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei, + * void *ta_buf); + * MANDATORY, locked, UH. (M_NOWAIT). Returns 0 on success. + * + * Buffer ta_buf of size ta->ta_buf_sz may be used to store + * allocated state. Caller should use on-stack ta_buf allocation + * instead of doing malloc(). + * + * + * + * -add: request to insert new entry into runtime/config structures. + * typedef int (ta_add)(void *ta_state, struct table_info *ti, + * struct tentry_info *tei, void *ta_buf, uint32_t *pnum); + * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success. + * + * Insert new entry using previously-allocated state in @ta_buf. + * * @tei may have the following flags: + * TEI_FLAGS_UPDATE: request to add or update entry. + * TEI_FLAGS_DONTADD: request to update (but not add) entry. + * * Caller is required to do the following: + * entry added: return 0, set 1 to @pnum + * entry updated: return 0, store 0 to @pnum, store old value in @tei, + * add TEI_FLAGS_UPDATED flag to @tei. + * entry exists: return EEXIST + * entry not found: return ENOENT + * other error: return non-zero error code. + * + * + * + * -del: request to delete existing entry from runtime/config structures. + * typedef int (ta_del)(void *ta_state, struct table_info *ti, + * struct tentry_info *tei, void *ta_buf, uint32_t *pnum); + * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success. + * + * Delete entry using previously set up in @ta_buf. + * * Caller is required to do the following: + * entry deleted: return 0, set 1 to @pnum + * entry not found: return ENOENT + * other error: return non-zero error code. + * + * + * + * -flush_entry: flush entry state created by -prepare_add / -del / others + * typedef void (ta_flush_entry)(struct ip_fw_chain *ch, + * struct tentry_info *tei, void *ta_buf); + * MANDATORY, may be locked. (M_NOWAIT). + * + * Delete state allocated by: + * -prepare_add (-add returned EEXIST|UPDATED) + * -prepare_del (if any) + * -del + * * Caller is required to handle empty @ta_buf correctly. + * + * + * -find_tentry: finds entry specified by key @tei + * typedef int ta_find_tentry(void *ta_state, struct table_info *ti, + * ipfw_obj_tentry *tent); + * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 on success. + * + * Finds entry specified by given key. + * * Caller is requred to do the following: + * entry found: returns 0, export entry to @tent + * entry not found: returns ENOENT + * + * + * -need_modify: checks if @ti has enough space to hold another @count items. + * typedef int (ta_need_modify)(void *ta_state, struct table_info *ti, + * uint32_t count, uint64_t *pflags); + * MANDATORY, locked (UH). (M_NOWAIT). Returns 0 if has. + * + * Checks if given table has enough space to add @count items without + * resize. Caller may use @pflags to store desired modification data. + * + * + * + * -prepare_mod: allocate structures for table modification. + * typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags); + * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success. + * + * Allocate all needed state for table modification. Caller + * should use `struct mod_item` to store new state in @ta_buf. + * Up to TA_BUF_SZ (128 bytes) can be stored in @ta_buf. + * + * + * + * -fill_mod: copy some data to new state/ + * typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti, + * void *ta_buf, uint64_t *pflags); + * MANDATORY, locked (UH). (M_NOWAIT). Returns 0 on success. + * + * Copy as much data as we can to minimize changes under WLOCK. + * For example, array can be merged inside this callback. + * + * + * + * -modify: perform final modification. + * typedef void (ta_modify)(void *ta_state, struct table_info *ti, + * void *ta_buf, uint64_t pflags); + * MANDATORY, locked (UH+WLOCK). (M_NOWAIT). + * + * Performs all changes necessary to switch to new structures. + * * Caller should save old pointers to @ta_buf storage. + * + * + * + * -flush_mod: flush table modification state. + * typedef void (ta_flush_mod)(void *ta_buf); + * MANDATORY, unlocked. (M_WAITOK). + * + * Performs flush for the following: + * - prepare_mod (modification was not necessary) + * - modify (for the old state) + * + * + * + * -change_gi: monitor table info pointer changes + * typedef void (ta_change_ti)(void *ta_state, struct table_info *ti); + * OPTIONAL, locked (UH). (M_NOWAIT). + * + * Called on @ti pointer changed. Called immediately after -init + * to set initial state. + * + * + * + * -foreach: calls @f for each table entry + * typedef void ta_foreach(void *ta_state, struct table_info *ti, + * ta_foreach_f *f, void *arg); + * MANDATORY, locked(UH). (M_NOWAIT). + * + * Runs callback with specified argument for each table entry, + * Typically used for dumping table entries. + * + * + * + * -dump_tentry: dump table entry in current @tentry format. + * typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e, + * ipfw_obj_tentry *tent); + * MANDATORY, locked(UH). (M_NOWAIT). Returns 0 on success. + * + * Dumps entry @e to @tent. + * + * + * -print_config: prints custom algoritm options into buffer. + * typedef void (ta_print_config)(void *ta_state, struct table_info *ti, + * char *buf, size_t bufsize); + * OPTIONAL. locked(UH). (M_NOWAIT). + * + * Prints custom algorithm options in the format suitable to pass + * back to -init callback. + * + * + * + * -dump_tinfo: dumps algo-specific info. + * typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti, + * ipfw_ta_tinfo *tinfo); + * OPTIONAL. locked(UH). (M_NOWAIT). + * + * Dumps options like items size/hash size, etc. + */ + +static MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); + +/* + * Utility structures/functions common to more than one algo + */ + +struct mod_item { + void *main_ptr; + size_t size; + void *main_ptr6; + size_t size6; +}; + +static int badd(const void *key, void *item, void *base, size_t nmemb, + size_t size, int (*compar) (const void *, const void *)); +static int bdel(const void *key, void *base, size_t nmemb, size_t size, + int (*compar) (const void *, const void *)); + + +/* + * CIDR implementation using radix + * + */ + +/* + * The radix code expects addr and mask to be array of bytes, + * with the first byte being the length of the array. rn_inithead + * is called with the offset in bits of the lookup key within the + * array. If we use a sockaddr_in as the underlying type, + * sin_len is conveniently located at offset 0, sin_addr is at + * offset 4 and normally aligned. + * But for portability, let's avoid assumption and make the code explicit + */ +#define KEY_LEN(v) *((uint8_t *)&(v)) +/* + * Do not require radix to compare more than actual IPv4/IPv6 address + */ +#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t)) +#define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr)) + +#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr)) +#define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr)) + +struct radix_cidr_entry { + struct radix_node rn[2]; + struct sockaddr_in addr; + uint32_t value; + uint8_t masklen; +}; + +struct sa_in6 { + uint8_t sin6_len; + uint8_t sin6_family; + uint8_t pad[2]; + struct in6_addr sin6_addr; +}; + +struct radix_cidr_xentry { + struct radix_node rn[2]; + struct sa_in6 addr6; + uint32_t value; + uint8_t masklen; +}; + +struct radix_cfg { + struct radix_node_head *head4; + struct radix_node_head *head6; + size_t count4; + size_t count6; +}; + +struct ta_buf_cidr +{ + void *ent_ptr; + struct sockaddr *addr_ptr; + struct sockaddr *mask_ptr; + union { + struct { + struct sockaddr_in sa; + struct sockaddr_in ma; + } a4; + struct { + struct sa_in6 sa; + struct sa_in6 ma; + } a6; + } addr; +}; + +static int +ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val) +{ + struct radix_node_head *rnh; + + if (keylen == sizeof(in_addr_t)) { + struct radix_cidr_entry *ent; + struct sockaddr_in sa; + KEY_LEN(sa) = KEY_LEN_INET; + sa.sin_addr.s_addr = *((in_addr_t *)key); + rnh = (struct radix_node_head *)ti->state; + ent = (struct radix_cidr_entry *)(rnh->rnh_matchaddr(&sa, rnh)); + if (ent != NULL) { + *val = ent->value; + return (1); + } + } else { + struct radix_cidr_xentry *xent; + struct sa_in6 sa6; + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr)); + rnh = (struct radix_node_head *)ti->xstate; + xent = (struct radix_cidr_xentry *)(rnh->rnh_matchaddr(&sa6, rnh)); + if (xent != NULL) { + *val = xent->value; + return (1); + } + } + + return (0); +} + +/* + * New table + */ +static int +ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, + char *data, uint8_t tflags) +{ + struct radix_cfg *cfg; + + if (!rn_inithead(&ti->state, OFF_LEN_INET)) + return (ENOMEM); + if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) { + rn_detachhead(&ti->state); + return (ENOMEM); + } + + cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO); + + *ta_state = cfg; + ti->lookup = ta_lookup_radix; + + return (0); +} + +static int +flush_radix_entry(struct radix_node *rn, void *arg) +{ + struct radix_node_head * const rnh = arg; + struct radix_cidr_entry *ent; + + ent = (struct radix_cidr_entry *) + rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); + if (ent != NULL) + free(ent, M_IPFW_TBL); + return (0); +} + +static void +ta_destroy_radix(void *ta_state, struct table_info *ti) +{ + struct radix_cfg *cfg; + struct radix_node_head *rnh; + + cfg = (struct radix_cfg *)ta_state; + + rnh = (struct radix_node_head *)(ti->state); + rnh->rnh_walktree(rnh, flush_radix_entry, rnh); + rn_detachhead(&ti->state); + + rnh = (struct radix_node_head *)(ti->xstate); + rnh->rnh_walktree(rnh, flush_radix_entry, rnh); + rn_detachhead(&ti->xstate); + + free(cfg, M_IPFW); +} + +/* + * Provide algo-specific table info + */ +static void +ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) +{ + struct radix_cfg *cfg; + + cfg = (struct radix_cfg *)ta_state; + + tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM; + tinfo->taclass4 = IPFW_TACLASS_RADIX; + tinfo->count4 = cfg->count4; + tinfo->itemsize4 = sizeof(struct radix_cidr_entry); + tinfo->taclass6 = IPFW_TACLASS_RADIX; + tinfo->count6 = cfg->count6; + tinfo->itemsize6 = sizeof(struct radix_cidr_xentry); +} + +static int +ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, + ipfw_obj_tentry *tent) +{ + struct radix_cidr_entry *n; + struct radix_cidr_xentry *xn; + + n = (struct radix_cidr_entry *)e; + + /* Guess IPv4/IPv6 radix by sockaddr family */ + if (n->addr.sin_family == AF_INET) { + tent->k.addr.s_addr = n->addr.sin_addr.s_addr; + tent->masklen = n->masklen; + tent->subtype = AF_INET; + tent->value = n->value; +#ifdef INET6 + } else { + xn = (struct radix_cidr_xentry *)e; + memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr)); + tent->masklen = xn->masklen; + tent->subtype = AF_INET6; + tent->value = xn->value; +#endif + } + + return (0); +} + +static int +ta_find_radix_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct radix_node_head *rnh; + void *e; + + e = NULL; + if (tent->subtype == AF_INET) { + struct sockaddr_in sa; + KEY_LEN(sa) = KEY_LEN_INET; + sa.sin_addr.s_addr = tent->k.addr.s_addr; + rnh = (struct radix_node_head *)ti->state; + e = rnh->rnh_matchaddr(&sa, rnh); + } else { + struct sa_in6 sa6; + KEY_LEN(sa6) = KEY_LEN_INET6; + memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr)); + rnh = (struct radix_node_head *)ti->xstate; + e = rnh->rnh_matchaddr(&sa6, rnh); + } + + if (e != NULL) { + ta_dump_radix_tentry(ta_state, ti, e, tent); + return (0); + } + + return (ENOENT); +} + +static void +ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f, + void *arg) +{ + struct radix_node_head *rnh; + + rnh = (struct radix_node_head *)(ti->state); + rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); + + rnh = (struct radix_node_head *)(ti->xstate); + rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg); +} + + +#ifdef INET6 +static inline void +ipv6_writemask(struct in6_addr *addr6, uint8_t mask) +{ + uint32_t *cp; + + for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) + *cp++ = 0xFFFFFFFF; + *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); +} +#endif + +static void +tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa, + struct sockaddr *ma, int *set_mask) +{ + int mlen; + struct sockaddr_in *addr, *mask; + struct sa_in6 *addr6, *mask6; + in_addr_t a4; + + mlen = tei->masklen; + + if (tei->subtype == AF_INET) { +#ifdef INET + addr = (struct sockaddr_in *)sa; + mask = (struct sockaddr_in *)ma; + /* Set 'total' structure length */ + KEY_LEN(*addr) = KEY_LEN_INET; + KEY_LEN(*mask) = KEY_LEN_INET; + addr->sin_family = AF_INET; + mask->sin_addr.s_addr = + htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); + a4 = *((in_addr_t *)tei->paddr); + addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr; + if (mlen != 32) + *set_mask = 1; + else + *set_mask = 0; +#endif +#ifdef INET6 + } else if (tei->subtype == AF_INET6) { + /* IPv6 case */ + addr6 = (struct sa_in6 *)sa; + mask6 = (struct sa_in6 *)ma; + /* Set 'total' structure length */ + KEY_LEN(*addr6) = KEY_LEN_INET6; + KEY_LEN(*mask6) = KEY_LEN_INET6; + addr6->sin6_family = AF_INET6; + ipv6_writemask(&mask6->sin6_addr, mlen); + memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr)); + APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr); + if (mlen != 128) + *set_mask = 1; + else + *set_mask = 0; + } +#endif +} + +static int +ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_cidr *tb; + struct radix_cidr_entry *ent; + struct radix_cidr_xentry *xent; + struct sockaddr *addr, *mask; + int mlen, set_mask; + + tb = (struct ta_buf_cidr *)ta_buf; + + mlen = tei->masklen; + set_mask = 0; + + if (tei->subtype == AF_INET) { +#ifdef INET + if (mlen > 32) + return (EINVAL); + ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); + ent->value = tei->value; + ent->masklen = mlen; + + addr = (struct sockaddr *)&ent->addr; + mask = (struct sockaddr *)&tb->addr.a4.ma; + tb->ent_ptr = ent; +#endif +#ifdef INET6 + } else if (tei->subtype == AF_INET6) { + /* IPv6 case */ + if (mlen > 128) + return (EINVAL); + xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); + xent->value = tei->value; + xent->masklen = mlen; + + addr = (struct sockaddr *)&xent->addr6; + mask = (struct sockaddr *)&tb->addr.a6.ma; + tb->ent_ptr = xent; +#endif + } else { + /* Unknown CIDR type */ + return (EINVAL); + } + + tei_to_sockaddr_ent(tei, addr, mask, &set_mask); + /* Set pointers */ + tb->addr_ptr = addr; + if (set_mask != 0) + tb->mask_ptr = mask; + + return (0); +} + +static int +ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct radix_cfg *cfg; + struct radix_node_head *rnh; + struct radix_node *rn; + struct ta_buf_cidr *tb; + uint32_t *old_value, value; + + cfg = (struct radix_cfg *)ta_state; + tb = (struct ta_buf_cidr *)ta_buf; + + if (tei->subtype == AF_INET) + rnh = ti->state; + else + rnh = ti->xstate; + + /* Search for an entry first */ + rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, rnh); + if (rn != NULL) { + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) + return (EEXIST); + /* Record already exists. Update value if we're asked to */ + if (tei->subtype == AF_INET) + old_value = &((struct radix_cidr_entry *)rn)->value; + else + old_value = &((struct radix_cidr_xentry *)rn)->value; + + value = *old_value; + *old_value = tei->value; + tei->value = value; + + /* Indicate that update has happened instead of addition */ + tei->flags |= TEI_FLAGS_UPDATED; + *pnum = 0; + + return (0); + } + + if ((tei->flags & TEI_FLAGS_DONTADD) != 0) + return (EFBIG); + + rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, rnh, tb->ent_ptr); + if (rn == NULL) { + /* Unknown error */ + return (EINVAL); + } + + if (tei->subtype == AF_INET) + cfg->count4++; + else + cfg->count6++; + tb->ent_ptr = NULL; + *pnum = 1; + + return (0); +} + +static int +ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_cidr *tb; + struct sockaddr *addr, *mask; + int mlen, set_mask; + + tb = (struct ta_buf_cidr *)ta_buf; + + mlen = tei->masklen; + set_mask = 0; + + if (tei->subtype == AF_INET) { + if (mlen > 32) + return (EINVAL); + + addr = (struct sockaddr *)&tb->addr.a4.sa; + mask = (struct sockaddr *)&tb->addr.a4.ma; +#ifdef INET6 + } else if (tei->subtype == AF_INET6) { + if (mlen > 128) + return (EINVAL); + + addr = (struct sockaddr *)&tb->addr.a6.sa; + mask = (struct sockaddr *)&tb->addr.a6.ma; +#endif + } else + return (EINVAL); + + tei_to_sockaddr_ent(tei, addr, mask, &set_mask); + tb->addr_ptr = addr; + if (set_mask != 0) + tb->mask_ptr = mask; + + return (0); +} + +static int +ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct radix_cfg *cfg; + struct radix_node_head *rnh; + struct radix_node *rn; + struct ta_buf_cidr *tb; + + cfg = (struct radix_cfg *)ta_state; + tb = (struct ta_buf_cidr *)ta_buf; + + if (tei->subtype == AF_INET) + rnh = ti->state; + else + rnh = ti->xstate; + + rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, rnh); + + if (rn == NULL) + return (ENOENT); + + /* Save entry value to @tei */ + if (tei->subtype == AF_INET) + tei->value = ((struct radix_cidr_entry *)rn)->value; + else + tei->value = ((struct radix_cidr_xentry *)rn)->value; + + tb->ent_ptr = rn; + + if (tei->subtype == AF_INET) + cfg->count4--; + else + cfg->count6--; + *pnum = 1; + + return (0); +} + +static void +ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_cidr *tb; + + tb = (struct ta_buf_cidr *)ta_buf; + + if (tb->ent_ptr != NULL) + free(tb->ent_ptr, M_IPFW_TBL); +} + +static int +ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + + /* + * radix does not require additional memory allocations + * other than nodes itself. Adding new masks to the tree do + * but we don't have any API to call (and we don't known which + * sizes do we need). + */ + return (0); +} + +struct table_algo cidr_radix = { + .name = "cidr:radix", + .type = IPFW_TABLE_CIDR, + .flags = TA_FLAG_DEFAULT, + .ta_buf_size = sizeof(struct ta_buf_cidr), + .init = ta_init_radix, + .destroy = ta_destroy_radix, + .prepare_add = ta_prepare_add_radix, + .prepare_del = ta_prepare_del_radix, + .add = ta_add_radix, + .del = ta_del_radix, + .flush_entry = ta_flush_radix_entry, + .foreach = ta_foreach_radix, + .dump_tentry = ta_dump_radix_tentry, + .find_tentry = ta_find_radix_tentry, + .dump_tinfo = ta_dump_radix_tinfo, + .need_modify = ta_need_modify_radix, +}; + + +/* + * cidr:hash cmds + * + * + * ti->data: + * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] + * [ 8][ 8[ 8][ 8] + * + * inv.mask4: 32 - mask + * inv.mask6: + * 1) _slow lookup: mask + * 2) _aligned: (128 - mask) / 8 + * 3) _64: 8 + * + * + * pflags: + * [v4=1/v6=0][hsize] + * [ 32][ 32] + */ + +struct chashentry; + +SLIST_HEAD(chashbhead, chashentry); + +struct chash_cfg { + struct chashbhead *head4; + struct chashbhead *head6; + size_t size4; + size_t size6; + size_t items4; + size_t items6; + uint8_t mask4; + uint8_t mask6; +}; + +struct chashentry { + SLIST_ENTRY(chashentry) next; + uint32_t value; + uint32_t type; + union { + uint32_t a4; /* Host format */ + struct in6_addr a6; /* Network format */ + } a; +}; + +struct ta_buf_chash +{ + void *ent_ptr; + struct chashentry ent; +}; + + +static __inline uint32_t +hash_ip(uint32_t addr, int hsize) +{ + + return (addr % (hsize - 1)); +} + +static __inline uint32_t +hash_ip6(struct in6_addr *addr6, int hsize) +{ + uint32_t i; + + i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^ + addr6->s6_addr32[2] ^ addr6->s6_addr32[3]; + + return (i % (hsize - 1)); +} + + +static __inline uint16_t +hash_ip64(struct in6_addr *addr6, int hsize) +{ + uint32_t i; + + i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1]; + + return (i % (hsize - 1)); +} + + +static __inline uint32_t +hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize) +{ + struct in6_addr mask6; + + ipv6_writemask(&mask6, mask); + memcpy(addr6, key, sizeof(struct in6_addr)); + APPLY_MASK(addr6, &mask6); + return (hash_ip6(addr6, hsize)); +} + +static __inline uint32_t +hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize) +{ + uint64_t *paddr; + + paddr = (uint64_t *)addr6; + *paddr = 0; + *(paddr + 1) = 0; + memcpy(addr6, key, mask); + return (hash_ip6(addr6, hsize)); +} + +static int +ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val) +{ + struct chashbhead *head; + struct chashentry *ent; + uint16_t hash, hsize; + uint8_t imask; + + if (keylen == sizeof(in_addr_t)) { + head = (struct chashbhead *)ti->state; + imask = ti->data >> 24; + hsize = 1 << ((ti->data & 0xFFFF) >> 8); + uint32_t a; + a = ntohl(*((in_addr_t *)key)); + a = a >> imask; + hash = hash_ip(a, hsize); + SLIST_FOREACH(ent, &head[hash], next) { + if (ent->a.a4 == a) { + *val = ent->value; + return (1); + } + } + } else { + /* IPv6: worst scenario: non-round mask */ + struct in6_addr addr6; + head = (struct chashbhead *)ti->xstate; + imask = (ti->data & 0xFF0000) >> 16; + hsize = 1 << (ti->data & 0xFF); + hash = hash_ip6_slow(&addr6, key, imask, hsize); + SLIST_FOREACH(ent, &head[hash], next) { + if (memcmp(&ent->a.a6, &addr6, 16) == 0) { + *val = ent->value; + return (1); + } + } + } + + return (0); +} + +static int +ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val) +{ + struct chashbhead *head; + struct chashentry *ent; + uint16_t hash, hsize; + uint8_t imask; + + if (keylen == sizeof(in_addr_t)) { + head = (struct chashbhead *)ti->state; + imask = ti->data >> 24; + hsize = 1 << ((ti->data & 0xFFFF) >> 8); + uint32_t a; + a = ntohl(*((in_addr_t *)key)); + a = a >> imask; + hash = hash_ip(a, hsize); + SLIST_FOREACH(ent, &head[hash], next) { + if (ent->a.a4 == a) { + *val = ent->value; + return (1); + } + } + } else { + /* IPv6: aligned to 8bit mask */ + struct in6_addr addr6; + uint64_t *paddr, *ptmp; + head = (struct chashbhead *)ti->xstate; + imask = (ti->data & 0xFF0000) >> 16; + hsize = 1 << (ti->data & 0xFF); + + hash = hash_ip6_al(&addr6, key, imask, hsize); + paddr = (uint64_t *)&addr6; + SLIST_FOREACH(ent, &head[hash], next) { + ptmp = (uint64_t *)&ent->a.a6; + if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) { + *val = ent->value; + return (1); + } + } + } + + return (0); +} + +static int +ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val) +{ + struct chashbhead *head; + struct chashentry *ent; + uint16_t hash, hsize; + uint8_t imask; + + if (keylen == sizeof(in_addr_t)) { + head = (struct chashbhead *)ti->state; + imask = ti->data >> 24; + hsize = 1 << ((ti->data & 0xFFFF) >> 8); + uint32_t a; + a = ntohl(*((in_addr_t *)key)); + a = a >> imask; + hash = hash_ip(a, hsize); + SLIST_FOREACH(ent, &head[hash], next) { + if (ent->a.a4 == a) { + *val = ent->value; + return (1); + } + } + } else { + /* IPv6: /64 */ + uint64_t a6, *paddr; + head = (struct chashbhead *)ti->xstate; + paddr = (uint64_t *)key; + hsize = 1 << (ti->data & 0xFF); + a6 = *paddr; + hash = hash_ip64((struct in6_addr *)key, hsize); + SLIST_FOREACH(ent, &head[hash], next) { + paddr = (uint64_t *)&ent->a.a6; + if (a6 == *paddr) { + *val = ent->value; + return (1); + } + } + } + + return (0); +} + +static int +chash_parse_opts(struct chash_cfg *cfg, char *data) +{ + char *pdel, *pend, *s; + int mask4, mask6; + + mask4 = cfg->mask4; + mask6 = cfg->mask6; + + if (data == NULL) + return (0); + if ((pdel = strchr(data, ' ')) == NULL) + return (0); + while (*pdel == ' ') + pdel++; + if (strncmp(pdel, "masks=", 6) != 0) + return (EINVAL); + if ((s = strchr(pdel, ' ')) != NULL) + *s++ = '\0'; + + pdel += 6; + /* Need /XX[,/YY] */ + if (*pdel++ != '/') + return (EINVAL); + mask4 = strtol(pdel, &pend, 10); + if (*pend == ',') { + /* ,/YY */ + pdel = pend + 1; + if (*pdel++ != '/') + return (EINVAL); + mask6 = strtol(pdel, &pend, 10); + if (*pend != '\0') + return (EINVAL); + } else if (*pend != '\0') + return (EINVAL); + + if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128) + return (EINVAL); + + cfg->mask4 = mask4; + cfg->mask6 = mask6; + + return (0); +} + +static void +ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, + size_t bufsize) +{ + struct chash_cfg *cfg; + + cfg = (struct chash_cfg *)ta_state; + + if (cfg->mask4 != 32 || cfg->mask6 != 128) + snprintf(buf, bufsize, "%s masks=/%d,/%d", "cidr:hash", + cfg->mask4, cfg->mask6); + else + snprintf(buf, bufsize, "%s", "cidr:hash"); +} + +static int +log2(uint32_t v) +{ + uint32_t r; + + r = 0; + while (v >>= 1) + r++; + + return (r); +} + +/* + * New table. + * We assume 'data' to be either NULL or the following format: + * 'cidr:hash [masks=/32[,/128]]' + */ +static int +ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, + char *data, uint8_t tflags) +{ + int error, i; + uint32_t hsize; + struct chash_cfg *cfg; + + cfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO); + + cfg->mask4 = 32; + cfg->mask6 = 128; + + if ((error = chash_parse_opts(cfg, data)) != 0) { + free(cfg, M_IPFW); + return (error); + } + + cfg->size4 = 128; + cfg->size6 = 128; + + cfg->head4 = malloc(sizeof(struct chashbhead) * cfg->size4, M_IPFW, + M_WAITOK | M_ZERO); + cfg->head6 = malloc(sizeof(struct chashbhead) * cfg->size6, M_IPFW, + M_WAITOK | M_ZERO); + for (i = 0; i < cfg->size4; i++) + SLIST_INIT(&cfg->head4[i]); + for (i = 0; i < cfg->size6; i++) + SLIST_INIT(&cfg->head6[i]); + + + *ta_state = cfg; + ti->state = cfg->head4; + ti->xstate = cfg->head6; + + /* Store data depending on v6 mask length */ + hsize = log2(cfg->size4) << 8 | log2(cfg->size6); + if (cfg->mask6 == 64) { + ti->data = (32 - cfg->mask4) << 24 | (128 - cfg->mask6) << 16| + hsize; + ti->lookup = ta_lookup_chash_64; + } else if ((cfg->mask6 % 8) == 0) { + ti->data = (32 - cfg->mask4) << 24 | + cfg->mask6 << 13 | hsize; + ti->lookup = ta_lookup_chash_aligned; + } else { + /* don't do that! */ + ti->data = (32 - cfg->mask4) << 24 | + cfg->mask6 << 16 | hsize; + ti->lookup = ta_lookup_chash_slow; + } + + return (0); +} + +static void +ta_destroy_chash(void *ta_state, struct table_info *ti) +{ + struct chash_cfg *cfg; + struct chashentry *ent, *ent_next; + int i; + + cfg = (struct chash_cfg *)ta_state; + + for (i = 0; i < cfg->size4; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next) + free(ent, M_IPFW_TBL); + + for (i = 0; i < cfg->size6; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next) + free(ent, M_IPFW_TBL); + + free(cfg->head4, M_IPFW); + free(cfg->head6, M_IPFW); + + free(cfg, M_IPFW); +} + +static void +ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) +{ + struct chash_cfg *cfg; + + cfg = (struct chash_cfg *)ta_state; + + tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM; + tinfo->taclass4 = IPFW_TACLASS_HASH; + tinfo->size4 = cfg->size4; + tinfo->count4 = cfg->items4; + tinfo->itemsize4 = sizeof(struct chashentry); + tinfo->taclass6 = IPFW_TACLASS_HASH; + tinfo->size6 = cfg->size6; + tinfo->count6 = cfg->items6; + tinfo->itemsize6 = sizeof(struct chashentry); +} + +static int +ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e, + ipfw_obj_tentry *tent) +{ + struct chash_cfg *cfg; + struct chashentry *ent; + + cfg = (struct chash_cfg *)ta_state; + ent = (struct chashentry *)e; + + if (ent->type == AF_INET) { + tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - cfg->mask4)); + tent->masklen = cfg->mask4; + tent->subtype = AF_INET; + tent->value = ent->value; +#ifdef INET6 + } else { + memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr)); + tent->masklen = cfg->mask6; + tent->subtype = AF_INET6; + tent->value = ent->value; +#endif + } + + return (0); +} + +static uint32_t +hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size) +{ + uint32_t hash; + + if (af == AF_INET) { + hash = hash_ip(ent->a.a4, size); + } else { + if (mlen == 64) + hash = hash_ip64(&ent->a.a6, size); + else + hash = hash_ip6(&ent->a.a6, size); + } + + return (hash); +} + +static int +tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent) +{ + struct in6_addr mask6; + int mlen; + + + mlen = tei->masklen; + + if (tei->subtype == AF_INET) { +#ifdef INET + if (mlen > 32) + return (EINVAL); + ent->type = AF_INET; + + /* Calculate masked address */ + ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen); +#endif +#ifdef INET6 + } else if (tei->subtype == AF_INET6) { + /* IPv6 case */ + if (mlen > 128) + return (EINVAL); + ent->type = AF_INET6; + + ipv6_writemask(&mask6, mlen); + memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr)); + APPLY_MASK(&ent->a.a6, &mask6); +#endif + } else { + /* Unknown CIDR type */ + return (EINVAL); + } + ent->value = tei->value; + + return (0); +} + +static int +ta_find_chash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct chash_cfg *cfg; + struct chashbhead *head; + struct chashentry ent, *tmp; + struct tentry_info tei; + int error; + uint32_t hash; + + cfg = (struct chash_cfg *)ta_state; + + memset(&ent, 0, sizeof(ent)); + memset(&tei, 0, sizeof(tei)); + + if (tent->subtype == AF_INET) { + tei.paddr = &tent->k.addr; + tei.masklen = cfg->mask4; + tei.subtype = AF_INET; + + if ((error = tei_to_chash_ent(&tei, &ent)) != 0) + return (error); + + head = cfg->head4; + hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4); + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (tmp->a.a4 != ent.a.a4) + continue; + + ta_dump_chash_tentry(ta_state, ti, tmp, tent); + return (0); + } + } else { + tei.paddr = &tent->k.addr6; + tei.masklen = cfg->mask6; + tei.subtype = AF_INET6; + + if ((error = tei_to_chash_ent(&tei, &ent)) != 0) + return (error); + + head = cfg->head6; + hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6); + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0) + continue; + ta_dump_chash_tentry(ta_state, ti, tmp, tent); + return (0); + } + } + + return (ENOENT); +} + +static void +ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f, + void *arg) +{ + struct chash_cfg *cfg; + struct chashentry *ent, *ent_next; + int i; + + cfg = (struct chash_cfg *)ta_state; + + for (i = 0; i < cfg->size4; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next) + f(ent, arg); + + for (i = 0; i < cfg->size6; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next) + f(ent, arg); +} + +static int +ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_chash *tb; + struct chashentry *ent; + int error; + + tb = (struct ta_buf_chash *)ta_buf; + + ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); + + error = tei_to_chash_ent(tei, ent); + if (error != 0) { + free(ent, M_IPFW_TBL); + return (error); + } + tb->ent_ptr = ent; + + return (0); +} + +static int +ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct chash_cfg *cfg; + struct chashbhead *head; + struct chashentry *ent, *tmp; + struct ta_buf_chash *tb; + int exists; + uint32_t hash, value; + + cfg = (struct chash_cfg *)ta_state; + tb = (struct ta_buf_chash *)ta_buf; + ent = (struct chashentry *)tb->ent_ptr; + hash = 0; + exists = 0; + + if (tei->subtype == AF_INET) { + if (tei->masklen != cfg->mask4) + return (EINVAL); + head = cfg->head4; + hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4); + + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (tmp->a.a4 == ent->a.a4) { + exists = 1; + break; + } + } + } else { + if (tei->masklen != cfg->mask6) + return (EINVAL); + head = cfg->head6; + hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6); + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) { + exists = 1; + break; + } + } + } + + if (exists == 1) { + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) + return (EEXIST); + /* Record already exists. Update value if we're asked to */ + value = tmp->value; + tmp->value = tei->value; + tei->value = value; + /* Indicate that update has happened instead of addition */ + tei->flags |= TEI_FLAGS_UPDATED; + *pnum = 0; + } else { + if ((tei->flags & TEI_FLAGS_DONTADD) != 0) + return (EFBIG); + SLIST_INSERT_HEAD(&head[hash], ent, next); + tb->ent_ptr = NULL; + *pnum = 1; + + /* Update counters */ + if (tei->subtype == AF_INET) + cfg->items4++; + else + cfg->items6++; + } + + return (0); +} + +static int +ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_chash *tb; + + tb = (struct ta_buf_chash *)ta_buf; + + return (tei_to_chash_ent(tei, &tb->ent)); +} + +static int +ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct chash_cfg *cfg; + struct chashbhead *head; + struct chashentry *tmp, *tmp_next, *ent; + struct ta_buf_chash *tb; + uint32_t hash; + + cfg = (struct chash_cfg *)ta_state; + tb = (struct ta_buf_chash *)ta_buf; + ent = &tb->ent; + + if (tei->subtype == AF_INET) { + if (tei->masklen != cfg->mask4) + return (EINVAL); + head = cfg->head4; + hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4); + + SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) { + if (tmp->a.a4 != ent->a.a4) + continue; + + SLIST_REMOVE(&head[hash], tmp, chashentry, next); + cfg->items4--; + tb->ent_ptr = tmp; + tei->value = tmp->value; + *pnum = 1; + return (0); + } + } else { + if (tei->masklen != cfg->mask6) + return (EINVAL); + head = cfg->head6; + hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6); + SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) { + if (memcmp(&tmp->a.a6, &ent->a.a6, 16) != 0) + continue; + + SLIST_REMOVE(&head[hash], tmp, chashentry, next); + cfg->items6--; + tb->ent_ptr = tmp; + tei->value = tmp->value; + *pnum = 1; + return (0); + } + } + + return (ENOENT); +} + +static void +ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_chash *tb; + + tb = (struct ta_buf_chash *)ta_buf; + + if (tb->ent_ptr != NULL) + free(tb->ent_ptr, M_IPFW_TBL); +} + +/* + * Hash growing callbacks. + */ + +static int +ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct chash_cfg *cfg; + uint64_t data; + + /* + * Since we don't know exact number of IPv4/IPv6 records in @count, + * ignore non-zero @count value at all. Check current hash sizes + * and return appropriate data. + */ + + cfg = (struct chash_cfg *)ta_state; + + data = 0; + if (cfg->items4 > cfg->size4 && cfg->size4 < 65536) + data |= (cfg->size4 * 2) << 16; + if (cfg->items6 > cfg->size6 && cfg->size6 < 65536) + data |= cfg->size6 * 2; + + if (data != 0) { + *pflags = data; + return (1); + } + + return (0); +} + +/* + * Allocate new, larger chash. + */ +static int +ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags) +{ + struct mod_item *mi; + struct chashbhead *head; + int i; + + mi = (struct mod_item *)ta_buf; + + memset(mi, 0, sizeof(struct mod_item)); + mi->size = (*pflags >> 16) & 0xFFFF; + mi->size6 = *pflags & 0xFFFF; + if (mi->size > 0) { + head = malloc(sizeof(struct chashbhead) * mi->size, + M_IPFW, M_WAITOK | M_ZERO); + for (i = 0; i < mi->size; i++) + SLIST_INIT(&head[i]); + mi->main_ptr = head; + } + + if (mi->size6 > 0) { + head = malloc(sizeof(struct chashbhead) * mi->size6, + M_IPFW, M_WAITOK | M_ZERO); + for (i = 0; i < mi->size6; i++) + SLIST_INIT(&head[i]); + mi->main_ptr6 = head; + } + + return (0); +} + +/* + * Copy data from old runtime array to new one. + */ +static int +ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t *pflags) +{ + + /* In is not possible to do rehash if we're not holidng WLOCK. */ + return (0); +} + +/* + * Switch old & new arrays. + */ +static void +ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t pflags) +{ + struct mod_item *mi; + struct chash_cfg *cfg; + struct chashbhead *old_head, *new_head; + struct chashentry *ent, *ent_next; + int af, i, mlen; + uint32_t nhash; + size_t old_size, new_size; + + mi = (struct mod_item *)ta_buf; + cfg = (struct chash_cfg *)ta_state; + + /* Check which hash we need to grow and do we still need that */ + if (mi->size > 0 && cfg->size4 < mi->size) { + new_head = (struct chashbhead *)mi->main_ptr; + new_size = mi->size; + old_size = cfg->size4; + old_head = ti->state; + mlen = cfg->mask4; + af = AF_INET; + + for (i = 0; i < old_size; i++) { + SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { + nhash = hash_ent(ent, af, mlen, new_size); + SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + } + } + + ti->state = new_head; + cfg->head4 = new_head; + cfg->size4 = mi->size; + mi->main_ptr = old_head; + } + + if (mi->size6 > 0 && cfg->size6 < mi->size6) { + new_head = (struct chashbhead *)mi->main_ptr6; + new_size = mi->size6; + old_size = cfg->size6; + old_head = ti->xstate; + mlen = cfg->mask6; + af = AF_INET6; + + for (i = 0; i < old_size; i++) { + SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { + nhash = hash_ent(ent, af, mlen, new_size); + SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + } + } + + ti->xstate = new_head; + cfg->head6 = new_head; + cfg->size6 = mi->size6; + mi->main_ptr6 = old_head; + } + + /* Update lower 32 bits with new values */ + ti->data &= 0xFFFFFFFF00000000; + ti->data |= log2(cfg->size4) << 8 | log2(cfg->size6); +} + +/* + * Free unneded array. + */ +static void +ta_flush_mod_chash(void *ta_buf) +{ + struct mod_item *mi; + + mi = (struct mod_item *)ta_buf; + if (mi->main_ptr != NULL) + free(mi->main_ptr, M_IPFW); + if (mi->main_ptr6 != NULL) + free(mi->main_ptr6, M_IPFW); +} + +struct table_algo cidr_hash = { + .name = "cidr:hash", + .type = IPFW_TABLE_CIDR, + .ta_buf_size = sizeof(struct ta_buf_chash), + .init = ta_init_chash, + .destroy = ta_destroy_chash, + .prepare_add = ta_prepare_add_chash, + .prepare_del = ta_prepare_del_chash, + .add = ta_add_chash, + .del = ta_del_chash, + .flush_entry = ta_flush_chash_entry, + .foreach = ta_foreach_chash, + .dump_tentry = ta_dump_chash_tentry, + .find_tentry = ta_find_chash_tentry, + .print_config = ta_print_chash_config, + .dump_tinfo = ta_dump_chash_tinfo, + .need_modify = ta_need_modify_chash, + .prepare_mod = ta_prepare_mod_chash, + .fill_mod = ta_fill_mod_chash, + .modify = ta_modify_chash, + .flush_mod = ta_flush_mod_chash, +}; + + +/* + * Iface table cmds. + * + * Implementation: + * + * Runtime part: + * - sorted array of "struct ifidx" pointed by ti->state. + * Array is allocated with rounding up to IFIDX_CHUNK. Only existing + * interfaces are stored in array, however its allocated size is + * sufficient to hold all table records if needed. + * - current array size is stored in ti->data + * + * Table data: + * - "struct iftable_cfg" is allocated to store table state (ta_state). + * - All table records are stored inside namedobj instance. + * + */ + +struct ifidx { + uint16_t kidx; + uint16_t spare; + uint32_t value; +}; + +struct iftable_cfg; + +struct ifentry { + struct named_object no; + struct ipfw_ifc ic; + struct iftable_cfg *icfg; + uint32_t value; + int linked; +}; + +struct iftable_cfg { + struct namedobj_instance *ii; + struct ip_fw_chain *ch; + struct table_info *ti; + void *main_ptr; + size_t size; /* Number of items allocated in array */ + size_t count; /* Number of all items */ + size_t used; /* Number of items _active_ now */ +}; + +struct ta_buf_ifidx +{ + struct ifentry *ife; + uint32_t value; +}; + +int compare_ifidx(const void *k, const void *v); +static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex); + +int +compare_ifidx(const void *k, const void *v) +{ + struct ifidx *ifidx; + uint16_t key; + + key = *((uint16_t *)k); + ifidx = (struct ifidx *)v; + + if (key < ifidx->kidx) + return (-1); + else if (key > ifidx->kidx) + return (1); + + return (0); +} + +/* + * Adds item @item with key @key into ascending-sorted array @base. + * Assumes @base has enough additional storage. + * + * Returns 1 on success, 0 on duplicate key. + */ +static int +badd(const void *key, void *item, void *base, size_t nmemb, + size_t size, int (*compar) (const void *, const void *)) +{ + int min, max, mid, shift, res; + caddr_t paddr; + + if (nmemb == 0) { + memcpy(base, item, size); + return (1); + } + + /* Binary search */ + min = 0; + max = nmemb - 1; + mid = 0; + while (min <= max) { + mid = (min + max) / 2; + res = compar(key, (const void *)((caddr_t)base + mid * size)); + if (res == 0) + return (0); + + if (res > 0) + min = mid + 1; + else + max = mid - 1; + } + + /* Item not found. */ + res = compar(key, (const void *)((caddr_t)base + mid * size)); + if (res > 0) + shift = mid + 1; + else + shift = mid; + + paddr = (caddr_t)base + shift * size; + if (nmemb > shift) + memmove(paddr + size, paddr, (nmemb - shift) * size); + + memcpy(paddr, item, size); + + return (1); +} + +/* + * Deletes item with key @key from ascending-sorted array @base. + * + * Returns 1 on success, 0 for non-existent key. + */ +static int +bdel(const void *key, void *base, size_t nmemb, size_t size, + int (*compar) (const void *, const void *)) +{ + caddr_t item; + size_t sz; + + item = (caddr_t)bsearch(key, base, nmemb, size, compar); + + if (item == NULL) + return (0); + + sz = (caddr_t)base + nmemb * size - item; + + if (sz > 0) + memmove(item, item + size, sz); + + return (1); +} + +static struct ifidx * +ifidx_find(struct table_info *ti, void *key) +{ + struct ifidx *ifi; + + ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx), + compare_ifidx); + + return (ifi); +} + +static int +ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val) +{ + struct ifidx *ifi; + + ifi = ifidx_find(ti, key); + + if (ifi != NULL) { + *val = ifi->value; + return (1); + } + + return (0); +} + +static int +ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, + char *data, uint8_t tflags) +{ + struct iftable_cfg *icfg; + + icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO); + + icfg->ii = ipfw_objhash_create(16); + icfg->size = 16; + icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW, + M_WAITOK | M_ZERO); + icfg->ch = ch; + + *ta_state = icfg; + ti->state = icfg->main_ptr; + ti->lookup = ta_lookup_ifidx; + + return (0); +} + +/* + * Handle tableinfo @ti pointer change (on table array resize). + */ +static void +ta_change_ti_ifidx(void *ta_state, struct table_info *ti) +{ + struct iftable_cfg *icfg; + + icfg = (struct iftable_cfg *)ta_state; + icfg->ti = ti; +} + +static void +destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no, + void *arg) +{ + struct ifentry *ife; + struct ip_fw_chain *ch; + + ch = (struct ip_fw_chain *)arg; + ife = (struct ifentry *)no; + + ipfw_iface_del_notify(ch, &ife->ic); + free(ife, M_IPFW_TBL); +} + + +/* + * Destroys table @ti + */ +static void +ta_destroy_ifidx(void *ta_state, struct table_info *ti) +{ + struct iftable_cfg *icfg; + struct ip_fw_chain *ch; + + icfg = (struct iftable_cfg *)ta_state; + ch = icfg->ch; + + if (icfg->main_ptr != NULL) + free(icfg->main_ptr, M_IPFW); + + ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch); + + ipfw_objhash_destroy(icfg->ii); + + free(icfg, M_IPFW); +} + +/* + * Provide algo-specific table info + */ +static void +ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) +{ + struct iftable_cfg *cfg; + + cfg = (struct iftable_cfg *)ta_state; + + tinfo->taclass4 = IPFW_TACLASS_ARRAY; + tinfo->size4 = cfg->size; + tinfo->count4 = cfg->used; + tinfo->itemsize4 = sizeof(struct ifidx); +} + +/* + * Prepare state to add to the table: + * allocate ifentry and reference needed interface. + */ +static int +ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_ifidx *tb; + char *ifname; + struct ifentry *ife; + + tb = (struct ta_buf_ifidx *)ta_buf; + + /* Check if string is terminated */ + ifname = (char *)tei->paddr; + if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) + return (EINVAL); + + ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO); + ife->value = tei->value; + ife->ic.cb = if_notifier; + ife->ic.cbdata = ife; + + if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0) + return (EINVAL); + + /* Use ipfw_iface 'ifname' field as stable storage */ + ife->no.name = ife->ic.iface->ifname; + + tb->ife = ife; + + return (0); +} + +static int +ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct iftable_cfg *icfg; + struct ifentry *ife, *tmp; + struct ta_buf_ifidx *tb; + struct ipfw_iface *iif; + struct ifidx *ifi; + char *ifname; + uint32_t value; + + tb = (struct ta_buf_ifidx *)ta_buf; + ifname = (char *)tei->paddr; + icfg = (struct iftable_cfg *)ta_state; + ife = tb->ife; + + ife->icfg = icfg; + + tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); + + if (tmp != NULL) { + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) + return (EEXIST); + + /* Exchange values in @tmp and @tei */ + value = tmp->value; + tmp->value = tei->value; + tei->value = value; + + iif = tmp->ic.iface; + if (iif->resolved != 0) { + /* We have to update runtime value, too */ + ifi = ifidx_find(ti, &iif->ifindex); + ifi->value = ife->value; + } + + /* Indicate that update has happened instead of addition */ + tei->flags |= TEI_FLAGS_UPDATED; + *pnum = 0; + return (0); + } + + if ((tei->flags & TEI_FLAGS_DONTADD) != 0) + return (EFBIG); + + /* Link to internal list */ + ipfw_objhash_add(icfg->ii, &ife->no); + + /* Link notifier (possible running its callback) */ + ipfw_iface_add_notify(icfg->ch, &ife->ic); + icfg->count++; + + tb->ife = NULL; + *pnum = 1; + + return (0); +} + +/* + * Prepare to delete key from table. + * Do basic interface name checks. + */ +static int +ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_ifidx *tb; + char *ifname; + + tb = (struct ta_buf_ifidx *)ta_buf; + + /* Check if string is terminated */ + ifname = (char *)tei->paddr; + if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) + return (EINVAL); + + return (0); +} + +/* + * Remove key from both configuration list and + * runtime array. Removed interface notification. + */ +static int +ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct iftable_cfg *icfg; + struct ifentry *ife; + struct ta_buf_ifidx *tb; + char *ifname; + uint16_t ifindex; + int res; + + tb = (struct ta_buf_ifidx *)ta_buf; + ifname = (char *)tei->paddr; + icfg = (struct iftable_cfg *)ta_state; + ife = tb->ife; + + ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); + + if (ife == NULL) + return (ENOENT); + + if (ife->linked != 0) { + /* We have to remove item from runtime */ + ifindex = ife->ic.iface->ifindex; + + res = bdel(&ifindex, icfg->main_ptr, icfg->used, + sizeof(struct ifidx), compare_ifidx); + + KASSERT(res == 1, ("index %d does not exist", ifindex)); + icfg->used--; + ti->data = icfg->used; + ife->linked = 0; + } + + /* Unlink from local list */ + ipfw_objhash_del(icfg->ii, &ife->no); + /* Unlink notifier */ + ipfw_iface_del_notify(icfg->ch, &ife->ic); + + icfg->count--; + tei->value = ife->value; + + tb->ife = ife; + *pnum = 1; + + return (0); +} + +/* + * Flush deleted entry. + * Drops interface reference and frees entry. + */ +static void +ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_ifidx *tb; + + tb = (struct ta_buf_ifidx *)ta_buf; + + if (tb->ife != NULL) { + /* Unlink first */ + ipfw_iface_unref(ch, &tb->ife->ic); + free(tb->ife, M_IPFW_TBL); + } +} + + +/* + * Handle interface announce/withdrawal for particular table. + * Every real runtime array modification happens here. + */ +static void +if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex) +{ + struct ifentry *ife; + struct ifidx ifi; + struct iftable_cfg *icfg; + struct table_info *ti; + int res; + + ife = (struct ifentry *)cbdata; + icfg = ife->icfg; + ti = icfg->ti; + + KASSERT(ti != NULL, ("ti=NULL, check change_ti handler")); + + if (ife->linked == 0 && ifindex != 0) { + /* Interface announce */ + ifi.kidx = ifindex; + ifi.spare = 0; + ifi.value = ife->value; + res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used, + sizeof(struct ifidx), compare_ifidx); + KASSERT(res == 1, ("index %d already exists", ifindex)); + icfg->used++; + ti->data = icfg->used; + ife->linked = 1; + } else if (ife->linked != 0 && ifindex == 0) { + /* Interface withdrawal */ + ifindex = ife->ic.iface->ifindex; + + res = bdel(&ifindex, icfg->main_ptr, icfg->used, + sizeof(struct ifidx), compare_ifidx); + + KASSERT(res == 1, ("index %d does not exist", ifindex)); + icfg->used--; + ti->data = icfg->used; + ife->linked = 0; + } +} + + +/* + * Table growing callbacks. + */ + +static int +ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct iftable_cfg *cfg; + uint32_t size; + + cfg = (struct iftable_cfg *)ta_state; + + size = cfg->size; + while (size < cfg->count + count) + size *= 2; + + if (size != cfg->size) { + *pflags = size; + return (1); + } + + return (0); +} + +/* + * Allocate ned, larger runtime ifidx array. + */ +static int +ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags) +{ + struct mod_item *mi; + + mi = (struct mod_item *)ta_buf; + + memset(mi, 0, sizeof(struct mod_item)); + mi->size = *pflags; + mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW, + M_WAITOK | M_ZERO); + + return (0); +} + +/* + * Copy data from old runtime array to new one. + */ +static int +ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t *pflags) +{ + struct mod_item *mi; + struct iftable_cfg *icfg; + + mi = (struct mod_item *)ta_buf; + icfg = (struct iftable_cfg *)ta_state; + + /* Check if we still need to grow array */ + if (icfg->size >= mi->size) { + *pflags = 0; + return (0); + } + + memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx)); + + return (0); +} + +/* + * Switch old & new arrays. + */ +static void +ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t pflags) +{ + struct mod_item *mi; + struct iftable_cfg *icfg; + void *old_ptr; + + mi = (struct mod_item *)ta_buf; + icfg = (struct iftable_cfg *)ta_state; + + old_ptr = icfg->main_ptr; + icfg->main_ptr = mi->main_ptr; + icfg->size = mi->size; + ti->state = icfg->main_ptr; + + mi->main_ptr = old_ptr; +} + +/* + * Free unneded array. + */ +static void +ta_flush_mod_ifidx(void *ta_buf) +{ + struct mod_item *mi; + + mi = (struct mod_item *)ta_buf; + if (mi->main_ptr != NULL) + free(mi->main_ptr, M_IPFW); +} + +static int +ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, + ipfw_obj_tentry *tent) +{ + struct ifentry *ife; + + ife = (struct ifentry *)e; + + tent->masklen = 8 * IF_NAMESIZE; + memcpy(&tent->k, ife->no.name, IF_NAMESIZE); + tent->value = ife->value; + + return (0); +} + +static int +ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct iftable_cfg *icfg; + struct ifentry *ife; + char *ifname; + + icfg = (struct iftable_cfg *)ta_state; + ifname = tent->k.iface; + + if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) + return (EINVAL); + + ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); + + if (ife != NULL) { + ta_dump_ifidx_tentry(ta_state, ti, ife, tent); + return (0); + } + + return (ENOENT); +} + +struct wa_ifidx { + ta_foreach_f *f; + void *arg; +}; + +static void +foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, + void *arg) +{ + struct ifentry *ife; + struct wa_ifidx *wa; + + ife = (struct ifentry *)no; + wa = (struct wa_ifidx *)arg; + + wa->f(ife, wa->arg); +} + +static void +ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f, + void *arg) +{ + struct iftable_cfg *icfg; + struct wa_ifidx wa; + + icfg = (struct iftable_cfg *)ta_state; + + wa.f = f; + wa.arg = arg; + + ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa); +} + +struct table_algo iface_idx = { + .name = "iface:array", + .type = IPFW_TABLE_INTERFACE, + .flags = TA_FLAG_DEFAULT, + .ta_buf_size = sizeof(struct ta_buf_ifidx), + .init = ta_init_ifidx, + .destroy = ta_destroy_ifidx, + .prepare_add = ta_prepare_add_ifidx, + .prepare_del = ta_prepare_del_ifidx, + .add = ta_add_ifidx, + .del = ta_del_ifidx, + .flush_entry = ta_flush_ifidx_entry, + .foreach = ta_foreach_ifidx, + .dump_tentry = ta_dump_ifidx_tentry, + .find_tentry = ta_find_ifidx_tentry, + .dump_tinfo = ta_dump_ifidx_tinfo, + .need_modify = ta_need_modify_ifidx, + .prepare_mod = ta_prepare_mod_ifidx, + .fill_mod = ta_fill_mod_ifidx, + .modify = ta_modify_ifidx, + .flush_mod = ta_flush_mod_ifidx, + .change_ti = ta_change_ti_ifidx, +}; + +/* + * Number array cmds. + * + * Implementation: + * + * Runtime part: + * - sorted array of "struct numarray" pointed by ti->state. + * Array is allocated with rounding up to NUMARRAY_CHUNK. + * - current array size is stored in ti->data + * + */ + +struct numarray { + uint32_t number; + uint32_t value; +}; + +struct numarray_cfg { + void *main_ptr; + size_t size; /* Number of items allocated in array */ + size_t used; /* Number of items _active_ now */ +}; + +struct ta_buf_numarray +{ + struct numarray na; +}; + +int compare_numarray(const void *k, const void *v); + +int +compare_numarray(const void *k, const void *v) +{ + struct numarray *na; + uint32_t key; + + key = *((uint32_t *)k); + na = (struct numarray *)v; + + if (key < na->number) + return (-1); + else if (key > na->number) + return (1); + + return (0); +} + +static struct numarray * +numarray_find(struct table_info *ti, void *key) +{ + struct numarray *ri; + + ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray), + compare_ifidx); + + return (ri); +} + +static int +ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val) +{ + struct numarray *ri; + + ri = numarray_find(ti, key); + + if (ri != NULL) { + *val = ri->value; + return (1); + } + + return (0); +} + +static int +ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, + char *data, uint8_t tflags) +{ + struct numarray_cfg *cfg; + + cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO); + + cfg->size = 16; + cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW, + M_WAITOK | M_ZERO); + + *ta_state = cfg; + ti->state = cfg->main_ptr; + ti->lookup = ta_lookup_numarray; + + return (0); +} + +/* + * Destroys table @ti + */ +static void +ta_destroy_numarray(void *ta_state, struct table_info *ti) +{ + struct numarray_cfg *cfg; + + cfg = (struct numarray_cfg *)ta_state; + + if (cfg->main_ptr != NULL) + free(cfg->main_ptr, M_IPFW); + + free(cfg, M_IPFW); +} + +/* + * Provide algo-specific table info + */ +static void +ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) +{ + struct numarray_cfg *cfg; + + cfg = (struct numarray_cfg *)ta_state; + + tinfo->taclass4 = IPFW_TACLASS_ARRAY; + tinfo->size4 = cfg->size; + tinfo->count4 = cfg->used; + tinfo->itemsize4 = sizeof(struct numarray); +} + +/* + * Prepare for addition/deletion to an array. + */ +static int +ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_numarray *tb; + + tb = (struct ta_buf_numarray *)ta_buf; + + tb->na.number = *((uint32_t *)tei->paddr); + tb->na.value = tei->value; + + return (0); +} + +static int +ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct numarray_cfg *cfg; + struct ta_buf_numarray *tb; + struct numarray *ri; + int res; + uint32_t value; + + tb = (struct ta_buf_numarray *)ta_buf; + cfg = (struct numarray_cfg *)ta_state; + + ri = numarray_find(ti, &tb->na.number); + + if (ri != NULL) { + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) + return (EEXIST); + + /* Exchange values between ri and @tei */ + value = ri->value; + ri->value = tei->value; + tei->value = value; + /* Indicate that update has happened instead of addition */ + tei->flags |= TEI_FLAGS_UPDATED; + *pnum = 0; + return (0); + } + + if ((tei->flags & TEI_FLAGS_DONTADD) != 0) + return (EFBIG); + + res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used, + sizeof(struct numarray), compare_numarray); + + KASSERT(res == 1, ("number %d already exists", tb->na.number)); + cfg->used++; + ti->data = cfg->used; + *pnum = 1; + + return (0); +} + +/* + * Remove key from both configuration list and + * runtime array. Removed interface notification. + */ +static int +ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct numarray_cfg *cfg; + struct ta_buf_numarray *tb; + struct numarray *ri; + int res; + + tb = (struct ta_buf_numarray *)ta_buf; + cfg = (struct numarray_cfg *)ta_state; + + ri = numarray_find(ti, &tb->na.number); + if (ri == NULL) + return (ENOENT); + + tei->value = ri->value; + + res = bdel(&tb->na.number, cfg->main_ptr, cfg->used, + sizeof(struct numarray), compare_numarray); + + KASSERT(res == 1, ("number %u does not exist", tb->na.number)); + cfg->used--; + ti->data = cfg->used; + *pnum = 1; + + return (0); +} + +static void +ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + + /* We don't have any state, do nothing */ +} + + +/* + * Table growing callbacks. + */ + +static int +ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct numarray_cfg *cfg; + size_t size; + + cfg = (struct numarray_cfg *)ta_state; + + size = cfg->size; + while (size < cfg->used + count) + size *= 2; + + if (size != cfg->size) { + *pflags = size; + return (1); + } + + return (0); +} + +/* + * Allocate new, larger runtime array. + */ +static int +ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags) +{ + struct mod_item *mi; + + mi = (struct mod_item *)ta_buf; + + memset(mi, 0, sizeof(struct mod_item)); + mi->size = *pflags; + mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW, + M_WAITOK | M_ZERO); + + return (0); +} + +/* + * Copy data from old runtime array to new one. + */ +static int +ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t *pflags) +{ + struct mod_item *mi; + struct numarray_cfg *cfg; + + mi = (struct mod_item *)ta_buf; + cfg = (struct numarray_cfg *)ta_state; + + /* Check if we still need to grow array */ + if (cfg->size >= mi->size) { + *pflags = 0; + return (0); + } + + memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray)); + + return (0); +} + +/* + * Switch old & new arrays. + */ +static void +ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t pflags) +{ + struct mod_item *mi; + struct numarray_cfg *cfg; + void *old_ptr; + + mi = (struct mod_item *)ta_buf; + cfg = (struct numarray_cfg *)ta_state; + + old_ptr = cfg->main_ptr; + cfg->main_ptr = mi->main_ptr; + cfg->size = mi->size; + ti->state = cfg->main_ptr; + + mi->main_ptr = old_ptr; +} + +/* + * Free unneded array. + */ +static void +ta_flush_mod_numarray(void *ta_buf) +{ + struct mod_item *mi; + + mi = (struct mod_item *)ta_buf; + if (mi->main_ptr != NULL) + free(mi->main_ptr, M_IPFW); +} + +static int +ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e, + ipfw_obj_tentry *tent) +{ + struct numarray *na; + + na = (struct numarray *)e; + + tent->k.key = na->number; + tent->value = na->value; + + return (0); +} + +static int +ta_find_numarray_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct numarray_cfg *cfg; + struct numarray *ri; + + cfg = (struct numarray_cfg *)ta_state; + + ri = numarray_find(ti, &tent->k.key); + + if (ri != NULL) { + ta_dump_numarray_tentry(ta_state, ti, ri, tent); + return (0); + } + + return (ENOENT); +} + +static void +ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f, + void *arg) +{ + struct numarray_cfg *cfg; + struct numarray *array; + int i; + + cfg = (struct numarray_cfg *)ta_state; + array = cfg->main_ptr; + + for (i = 0; i < cfg->used; i++) + f(&array[i], arg); +} + +struct table_algo number_array = { + .name = "number:array", + .type = IPFW_TABLE_NUMBER, + .ta_buf_size = sizeof(struct ta_buf_numarray), + .init = ta_init_numarray, + .destroy = ta_destroy_numarray, + .prepare_add = ta_prepare_add_numarray, + .prepare_del = ta_prepare_add_numarray, + .add = ta_add_numarray, + .del = ta_del_numarray, + .flush_entry = ta_flush_numarray_entry, + .foreach = ta_foreach_numarray, + .dump_tentry = ta_dump_numarray_tentry, + .find_tentry = ta_find_numarray_tentry, + .dump_tinfo = ta_dump_numarray_tinfo, + .need_modify = ta_need_modify_numarray, + .prepare_mod = ta_prepare_mod_numarray, + .fill_mod = ta_fill_mod_numarray, + .modify = ta_modify_numarray, + .flush_mod = ta_flush_mod_numarray, +}; + +/* + * flow:hash cmds + * + * + * ti->data: + * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] + * [ 8][ 8[ 8][ 8] + * + * inv.mask4: 32 - mask + * inv.mask6: + * 1) _slow lookup: mask + * 2) _aligned: (128 - mask) / 8 + * 3) _64: 8 + * + * + * pflags: + * [hsize4][hsize6] + * [ 16][ 16] + */ + +struct fhashentry; + +SLIST_HEAD(fhashbhead, fhashentry); + +struct fhashentry { + SLIST_ENTRY(fhashentry) next; + uint8_t af; + uint8_t proto; + uint16_t spare0; + uint16_t dport; + uint16_t sport; + uint32_t value; + uint32_t spare1; +}; + +struct fhashentry4 { + struct fhashentry e; + struct in_addr dip; + struct in_addr sip; +}; + +struct fhashentry6 { + struct fhashentry e; + struct in6_addr dip6; + struct in6_addr sip6; +}; + +struct fhash_cfg { + struct fhashbhead *head; + size_t size; + size_t items; + struct fhashentry4 fe4; + struct fhashentry6 fe6; +}; + +struct ta_buf_fhash { + void *ent_ptr; + struct fhashentry6 fe6; +}; + +static __inline int +cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz) +{ + uint64_t *ka, *kb; + + ka = (uint64_t *)(&a->next + 1); + kb = (uint64_t *)(&b->next + 1); + + if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0)) + return (1); + + return (0); +} + +static __inline uint32_t +hash_flow4(struct fhashentry4 *f, int hsize) +{ + uint32_t i; + + i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport); + + return (i % (hsize - 1)); +} + +static __inline uint32_t +hash_flow6(struct fhashentry6 *f, int hsize) +{ + uint32_t i; + + i = (f->dip6.__u6_addr.__u6_addr32[2]) ^ + (f->dip6.__u6_addr.__u6_addr32[3]) ^ + (f->sip6.__u6_addr.__u6_addr32[2]) ^ + (f->sip6.__u6_addr.__u6_addr32[3]) ^ + (f->e.dport) ^ (f->e.sport); + + return (i % (hsize - 1)); +} + +static uint32_t +hash_flow_ent(struct fhashentry *ent, uint32_t size) +{ + uint32_t hash; + + if (ent->af == AF_INET) { + hash = hash_flow4((struct fhashentry4 *)ent, size); + } else { + hash = hash_flow6((struct fhashentry6 *)ent, size); + } + + return (hash); +} + +static int +ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, + uint32_t *val) +{ + struct fhashbhead *head; + struct fhashentry *ent; + struct fhashentry4 *m4; + struct ipfw_flow_id *id; + uint16_t hash, hsize; + + id = (struct ipfw_flow_id *)key; + head = (struct fhashbhead *)ti->state; + hsize = ti->data; + m4 = (struct fhashentry4 *)ti->xstate; + + if (id->addr_type == 4) { + struct fhashentry4 f; + + /* Copy hash mask */ + f = *m4; + + f.dip.s_addr &= id->dst_ip; + f.sip.s_addr &= id->src_ip; + f.e.dport &= id->dst_port; + f.e.sport &= id->src_port; + f.e.proto &= id->proto; + hash = hash_flow4(&f, hsize); + SLIST_FOREACH(ent, &head[hash], next) { + if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) { + *val = ent->value; + return (1); + } + } + } else if (id->addr_type == 6) { + struct fhashentry6 f; + uint64_t *fp, *idp; + + /* Copy hash mask */ + f = *((struct fhashentry6 *)(m4 + 1)); + + /* Handle lack of __u6_addr.__u6_addr64 */ + fp = (uint64_t *)&f.dip6; + idp = (uint64_t *)&id->dst_ip6; + /* src IPv6 is stored after dst IPv6 */ + *fp++ &= *idp++; + *fp++ &= *idp++; + *fp++ &= *idp++; + *fp &= *idp; + f.e.dport &= id->dst_port; + f.e.sport &= id->src_port; + f.e.proto &= id->proto; + hash = hash_flow6(&f, hsize); + SLIST_FOREACH(ent, &head[hash], next) { + if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) { + *val = ent->value; + return (1); + } + } + } + + return (0); +} + +/* + * New table. + */ +static int +ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, + char *data, uint8_t tflags) +{ + int i; + struct fhash_cfg *cfg; + struct fhashentry4 *fe4; + struct fhashentry6 *fe6; + + cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO); + + cfg->size = 512; + + cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW, + M_WAITOK | M_ZERO); + for (i = 0; i < cfg->size; i++) + SLIST_INIT(&cfg->head[i]); + + /* Fill in fe masks based on @tflags */ + fe4 = &cfg->fe4; + fe6 = &cfg->fe6; + if (tflags & IPFW_TFFLAG_SRCIP) { + memset(&fe4->sip, 0xFF, sizeof(fe4->sip)); + memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6)); + } + if (tflags & IPFW_TFFLAG_DSTIP) { + memset(&fe4->dip, 0xFF, sizeof(fe4->dip)); + memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6)); + } + if (tflags & IPFW_TFFLAG_SRCPORT) { + memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport)); + memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport)); + } + if (tflags & IPFW_TFFLAG_DSTPORT) { + memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport)); + memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport)); + } + if (tflags & IPFW_TFFLAG_PROTO) { + memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto)); + memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto)); + } + + fe4->e.af = AF_INET; + fe6->e.af = AF_INET6; + + *ta_state = cfg; + ti->state = cfg->head; + ti->xstate = &cfg->fe4; + ti->data = cfg->size; + ti->lookup = ta_lookup_fhash; + + return (0); +} + +static void +ta_destroy_fhash(void *ta_state, struct table_info *ti) +{ + struct fhash_cfg *cfg; + struct fhashentry *ent, *ent_next; + int i; + + cfg = (struct fhash_cfg *)ta_state; + + for (i = 0; i < cfg->size; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) + free(ent, M_IPFW_TBL); + + free(cfg->head, M_IPFW); + free(cfg, M_IPFW); +} + +/* + * Provide algo-specific table info + */ +static void +ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) +{ + struct fhash_cfg *cfg; + + cfg = (struct fhash_cfg *)ta_state; + + tinfo->flags = IPFW_TATFLAGS_AFITEM; + tinfo->taclass4 = IPFW_TACLASS_HASH; + tinfo->size4 = cfg->size; + tinfo->count4 = cfg->items; + tinfo->itemsize4 = sizeof(struct fhashentry4); + tinfo->itemsize6 = sizeof(struct fhashentry6); +} + +static int +ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e, + ipfw_obj_tentry *tent) +{ + struct fhash_cfg *cfg; + struct fhashentry *ent; + struct fhashentry4 *fe4; + struct fhashentry6 *fe6; + struct tflow_entry *tfe; + + cfg = (struct fhash_cfg *)ta_state; + ent = (struct fhashentry *)e; + tfe = &tent->k.flow; + + tfe->af = ent->af; + tfe->proto = ent->proto; + tfe->dport = htons(ent->dport); + tfe->sport = htons(ent->sport); + tent->value = ent->value; + tent->subtype = ent->af; + + if (ent->af == AF_INET) { + fe4 = (struct fhashentry4 *)ent; + tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr); + tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr); + tent->masklen = 32; +#ifdef INET6 + } else { + fe6 = (struct fhashentry6 *)ent; + tfe->a.a6.sip6 = fe6->sip6; + tfe->a.a6.dip6 = fe6->dip6; + tent->masklen = 128; +#endif + } + + return (0); +} + +static int +tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent) +{ + struct fhashentry4 *fe4; + struct fhashentry6 *fe6; + struct tflow_entry *tfe; + + tfe = (struct tflow_entry *)tei->paddr; + + ent->af = tei->subtype; + ent->proto = tfe->proto; + ent->value = tei->value; + ent->dport = ntohs(tfe->dport); + ent->sport = ntohs(tfe->sport); + + if (tei->subtype == AF_INET) { +#ifdef INET + fe4 = (struct fhashentry4 *)ent; + fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr); + fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr); +#endif +#ifdef INET6 + } else if (tei->subtype == AF_INET6) { + fe6 = (struct fhashentry6 *)ent; + fe6->sip6 = tfe->a.a6.sip6; + fe6->dip6 = tfe->a.a6.dip6; +#endif + } else { + /* Unknown CIDR type */ + return (EINVAL); + } + + return (0); +} + + +static int +ta_find_fhash_tentry(void *ta_state, struct table_info *ti, + ipfw_obj_tentry *tent) +{ + struct fhash_cfg *cfg; + struct fhashbhead *head; + struct fhashentry *ent, *tmp; + struct fhashentry6 fe6; + struct tentry_info tei; + int error; + uint32_t hash; + size_t sz; + + cfg = (struct fhash_cfg *)ta_state; + + ent = &fe6.e; + + memset(&fe6, 0, sizeof(fe6)); + memset(&tei, 0, sizeof(tei)); + + tei.paddr = &tent->k.flow; + tei.subtype = tent->subtype; + + if ((error = tei_to_fhash_ent(&tei, ent)) != 0) + return (error); + + head = cfg->head; + hash = hash_flow_ent(ent, cfg->size); + + if (tei.subtype == AF_INET) + sz = 2 * sizeof(struct in_addr); + else + sz = 2 * sizeof(struct in6_addr); + + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (cmp_flow_ent(tmp, ent, sz) != 0) { + ta_dump_fhash_tentry(ta_state, ti, tmp, tent); + return (0); + } + } + + return (ENOENT); +} + +static void +ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f, + void *arg) +{ + struct fhash_cfg *cfg; + struct fhashentry *ent, *ent_next; + int i; + + cfg = (struct fhash_cfg *)ta_state; + + for (i = 0; i < cfg->size; i++) + SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) + f(ent, arg); +} + +static int +ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_fhash *tb; + struct fhashentry *ent; + size_t sz; + int error; + + tb = (struct ta_buf_fhash *)ta_buf; + + if (tei->subtype == AF_INET) + sz = sizeof(struct fhashentry4); + else if (tei->subtype == AF_INET6) + sz = sizeof(struct fhashentry6); + else + return (EINVAL); + + ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO); + + error = tei_to_fhash_ent(tei, ent); + if (error != 0) { + free(ent, M_IPFW_TBL); + return (error); + } + tb->ent_ptr = ent; + + return (0); +} + +static int +ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct fhash_cfg *cfg; + struct fhashbhead *head; + struct fhashentry *ent, *tmp; + struct ta_buf_fhash *tb; + int exists; + uint32_t hash, value; + size_t sz; + + cfg = (struct fhash_cfg *)ta_state; + tb = (struct ta_buf_fhash *)ta_buf; + ent = (struct fhashentry *)tb->ent_ptr; + exists = 0; + + head = cfg->head; + hash = hash_flow_ent(ent, cfg->size); + + if (tei->subtype == AF_INET) + sz = 2 * sizeof(struct in_addr); + else + sz = 2 * sizeof(struct in6_addr); + + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (cmp_flow_ent(tmp, ent, sz) != 0) { + exists = 1; + break; + } + } + + if (exists == 1) { + if ((tei->flags & TEI_FLAGS_UPDATE) == 0) + return (EEXIST); + /* Record already exists. Update value if we're asked to */ + /* Exchange values between tmp and @tei */ + value = tmp->value; + tmp->value = tei->value; + tei->value = value; + /* Indicate that update has happened instead of addition */ + tei->flags |= TEI_FLAGS_UPDATED; + *pnum = 0; + } else { + if ((tei->flags & TEI_FLAGS_DONTADD) != 0) + return (EFBIG); + + SLIST_INSERT_HEAD(&head[hash], ent, next); + tb->ent_ptr = NULL; + *pnum = 1; + + /* Update counters and check if we need to grow hash */ + cfg->items++; + } + + return (0); +} + +static int +ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_fhash *tb; + + tb = (struct ta_buf_fhash *)ta_buf; + + return (tei_to_fhash_ent(tei, &tb->fe6.e)); +} + +static int +ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, + void *ta_buf, uint32_t *pnum) +{ + struct fhash_cfg *cfg; + struct fhashbhead *head; + struct fhashentry *ent, *tmp; + struct ta_buf_fhash *tb; + uint32_t hash; + size_t sz; + + cfg = (struct fhash_cfg *)ta_state; + tb = (struct ta_buf_fhash *)ta_buf; + ent = &tb->fe6.e; + + head = cfg->head; + hash = hash_flow_ent(ent, cfg->size); + + if (tei->subtype == AF_INET) + sz = 2 * sizeof(struct in_addr); + else + sz = 2 * sizeof(struct in6_addr); + + /* Check for existence */ + SLIST_FOREACH(tmp, &head[hash], next) { + if (cmp_flow_ent(tmp, ent, sz) == 0) + continue; + + SLIST_REMOVE(&head[hash], tmp, fhashentry, next); + tei->value = tmp->value; + *pnum = 1; + cfg->items--; + tb->ent_ptr = tmp; + return (0); + } + + return (ENOENT); +} + +static void +ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, + void *ta_buf) +{ + struct ta_buf_fhash *tb; + + tb = (struct ta_buf_fhash *)ta_buf; + + if (tb->ent_ptr != NULL) + free(tb->ent_ptr, M_IPFW_TBL); +} + +/* + * Hash growing callbacks. + */ + +static int +ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count, + uint64_t *pflags) +{ + struct fhash_cfg *cfg; + + cfg = (struct fhash_cfg *)ta_state; + + if (cfg->items > cfg->size && cfg->size < 65536) { + *pflags = cfg->size * 2; + return (1); + } + + return (0); +} + +/* + * Allocate new, larger fhash. + */ +static int +ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags) +{ + struct mod_item *mi; + struct fhashbhead *head; + int i; + + mi = (struct mod_item *)ta_buf; + + memset(mi, 0, sizeof(struct mod_item)); + mi->size = *pflags; + head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW, + M_WAITOK | M_ZERO); + for (i = 0; i < mi->size; i++) + SLIST_INIT(&head[i]); + + mi->main_ptr = head; + + return (0); +} + +/* + * Copy data from old runtime array to new one. + */ +static int +ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t *pflags) +{ + + /* In is not possible to do rehash if we're not holidng WLOCK. */ + return (0); +} + +/* + * Switch old & new arrays. + */ +static void +ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, + uint64_t pflags) +{ + struct mod_item *mi; + struct fhash_cfg *cfg; + struct fhashbhead *old_head, *new_head; + struct fhashentry *ent, *ent_next; + int i; + uint32_t nhash; + size_t old_size; + + mi = (struct mod_item *)ta_buf; + cfg = (struct fhash_cfg *)ta_state; + + old_size = cfg->size; + old_head = ti->state; + + new_head = (struct fhashbhead *)mi->main_ptr; + for (i = 0; i < old_size; i++) { + SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { + nhash = hash_flow_ent(ent, mi->size); + SLIST_INSERT_HEAD(&new_head[nhash], ent, next); + } + } + + ti->state = new_head; + ti->data = mi->size; + cfg->head = new_head; + cfg->size = mi->size; + + mi->main_ptr = old_head; +} + +/* + * Free unneded array. + */ +static void +ta_flush_mod_fhash(void *ta_buf) +{ + struct mod_item *mi; + + mi = (struct mod_item *)ta_buf; + if (mi->main_ptr != NULL) + free(mi->main_ptr, M_IPFW); +} + +struct table_algo flow_hash = { + .name = "flow:hash", + .type = IPFW_TABLE_FLOW, + .flags = TA_FLAG_DEFAULT, + .ta_buf_size = sizeof(struct ta_buf_fhash), + .init = ta_init_fhash, + .destroy = ta_destroy_fhash, + .prepare_add = ta_prepare_add_fhash, + .prepare_del = ta_prepare_del_fhash, + .add = ta_add_fhash, + .del = ta_del_fhash, + .flush_entry = ta_flush_fhash_entry, + .foreach = ta_foreach_fhash, + .dump_tentry = ta_dump_fhash_tentry, + .find_tentry = ta_find_fhash_tentry, + .dump_tinfo = ta_dump_fhash_tinfo, + .need_modify = ta_need_modify_fhash, + .prepare_mod = ta_prepare_mod_fhash, + .fill_mod = ta_fill_mod_fhash, + .modify = ta_modify_fhash, + .flush_mod = ta_flush_mod_fhash, +}; + +void +ipfw_table_algo_init(struct ip_fw_chain *ch) +{ + size_t sz; + + /* + * Register all algorithms presented here. + */ + sz = sizeof(struct table_algo); + ipfw_add_table_algo(ch, &cidr_radix, sz, &cidr_radix.idx); + ipfw_add_table_algo(ch, &cidr_hash, sz, &cidr_hash.idx); + ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx); + ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx); + ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx); +} + +void +ipfw_table_algo_destroy(struct ip_fw_chain *ch) +{ + + ipfw_del_table_algo(ch, cidr_radix.idx); + ipfw_del_table_algo(ch, cidr_hash.idx); + ipfw_del_table_algo(ch, iface_idx.idx); + ipfw_del_table_algo(ch, number_array.idx); + ipfw_del_table_algo(ch, flow_hash.idx); +} + + |