diff options
43 files changed, 8750 insertions, 2655 deletions
diff --git a/contrib/pf/authpf/authpf.8 b/contrib/pf/authpf/authpf.8 index 5d63e83..ee0dcaa 100644 --- a/contrib/pf/authpf/authpf.8 +++ b/contrib/pf/authpf/authpf.8 @@ -1,28 +1,18 @@ -.\" $OpenBSD: authpf.8,v 1.38 2005/01/04 09:57:04 jmc Exp $ +.\" $OpenBSD: authpf.8,v 1.43 2007/02/24 17:21:04 beck Exp $ .\" -.\" Copyright (c) 2002 Bob Beck (beck@openbsd.org>. All rights reserved. +.\" Copyright (c) 1998-2007 Bob Beck (beck@openbsd.org>. All rights reserved. .\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. The name of the author may not be used to endorse or promote products -.\" derived from this software without specific prior written permission. +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. .\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" .Dd January 10, 2002 .Dt AUTHPF 8 @@ -225,8 +215,11 @@ it becomes unresponsive, or if arp or address spoofing is used to hijack the session. Note that TCP keepalives are not sufficient for this, since they are not secure. -Also note that +Also note that the various SSH tunnelling mechanisms, +such as .Ar AllowTcpForwarding +and +.Ar PermitTunnel , should be disabled for .Nm users to prevent them from circumventing restrictions imposed by the @@ -424,8 +417,7 @@ TCP connections. external_if = "xl0" internal_if = "fxp0" -pass in log quick on $internal_if proto tcp from $user_ip to any \e - keep state +pass in log quick on $internal_if proto tcp from $user_ip to any pass in quick on $internal_if from $user_ip to any .Ed .Pp @@ -440,16 +432,15 @@ ipsec_gw="10.2.3.4" # rdr ftp for proxying by ftp-proxy(8) rdr on $internal_if proto tcp from $user_ip to any port 21 \e - -> 127.0.0.1 port 8081 + -> 127.0.0.1 port 8021 # allow out ftp, ssh, www and https only, and allow user to negotiate # ipsec with the ipsec server. pass in log quick on $internal_if proto tcp from $user_ip to any \e - port { 21, 22, 80, 443 } flags S/SA + port { 21, 22, 80, 443 } pass in quick on $internal_if proto tcp from $user_ip to any \e port { 21, 22, 80, 443 } -pass in quick proto udp from $user_ip to $ipsec_gw port = isakmp \e - keep state +pass in quick proto udp from $user_ip to $ipsec_gw port = isakmp pass in quick proto esp from $user_ip to $ipsec_gw .Ed .Pp @@ -464,7 +455,7 @@ int_if = "fxp0" # nat and tag connections... nat on $ext_if from $user_ip to any tag $user_ip -> $ext_addr pass in quick on $int_if from $user_ip to any -pass out log quick on $ext_if tagged $user_ip keep state +pass out log quick on $ext_if tagged $user_ip .Ed .Pp With the above rules added by @@ -490,7 +481,7 @@ lines will give SMTP and IMAP access to logged in users: .Bd -literal table <authpf_users> persist pass in on $ext_if proto tcp from <authpf_users> \e - to port { smtp imap } keep state + to port { smtp imap } .Ed .Pp It is also possible to use the "authpf_users" @@ -516,6 +507,7 @@ rdr-anchor "authpf/*" from <authpf_users> .Sh SEE ALSO .Xr pf 4 , .Xr pf.conf 5 , +.Xr securelevel 7 , .Xr ftp-proxy 8 .Sh HISTORY The diff --git a/contrib/pf/authpf/authpf.c b/contrib/pf/authpf/authpf.c index 1ae6aa4..68adcd2 100644 --- a/contrib/pf/authpf/authpf.c +++ b/contrib/pf/authpf/authpf.c @@ -1,28 +1,19 @@ -/* $OpenBSD: authpf.c,v 1.89 2005/02/10 04:24:15 joel Exp $ */ +/* $OpenBSD: authpf.c,v 1.104 2007/02/24 17:35:08 beck Exp $ */ /* - * Copyright (C) 1998 - 2002 Bob Beck (beck@openbsd.org). + * Copyright (C) 1998 - 2007 Bob Beck (beck@openbsd.org). * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <sys/types.h> @@ -50,15 +41,13 @@ #include "pathnames.h" -extern int symset(const char *, const char *, int); - static int read_config(FILE *); static void print_message(char *); static int allowed_luser(char *); static int check_luser(char *, char *); static int remove_stale_rulesets(void); static int change_filter(int, const char *, const char *); -static int change_table(int, const char *, const char *); +static int change_table(int, const char *); static void authpf_kill_states(void); int dev; /* pf device */ @@ -67,7 +56,6 @@ char rulesetname[MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 2]; char tablename[PF_TABLE_NAME_SIZE] = "authpf_users"; FILE *pidfp; -char *infile; /* file name printed by yyerror() in parse.y */ char luser[MAXLOGNAME]; /* username */ char ipsrc[256]; /* ip as a string */ char pidfile[MAXPATHLEN]; /* we save pid in this file. */ @@ -92,11 +80,16 @@ main(int argc, char *argv[]) struct in6_addr ina; struct passwd *pw; char *cp; + gid_t gid; uid_t uid; char *shell; login_cap_t *lc; config = fopen(PATH_CONFFILE, "r"); + if (config == NULL) { + syslog(LOG_ERR, "can not open %s (%m)", PATH_CONFFILE); + exit(1); + } if ((cp = getenv("SSH_TTY")) == NULL) { syslog(LOG_ERR, "non-interactive session connection for authpf"); @@ -133,7 +126,6 @@ main(int argc, char *argv[]) uid = getuid(); pw = getpwuid(uid); - endpwent(); if (pw == NULL) { syslog(LOG_ERR, "cannot find user for uid %u", uid); goto die; @@ -246,6 +238,8 @@ main(int argc, char *argv[]) if (++lockcnt > 10) { syslog(LOG_ERR, "cannot kill previous authpf (pid %d)", otherpid); + fclose(pidfp); + pidfp = NULL; goto dogdeath; } sleep(1); @@ -255,12 +249,22 @@ main(int argc, char *argv[]) * it's lock, giving us a chance to get it now */ fclose(pidfp); + pidfp = NULL; } while (1); + + /* whack the group list */ + gid = getegid(); + if (setgroups(1, &gid) == -1) { + syslog(LOG_INFO, "setgroups: %s", strerror(errno)); + do_death(0); + } /* revoke privs */ - seteuid(getuid()); - setuid(getuid()); - + uid = getuid(); + if (setresuid(uid, uid, uid) == -1) { + syslog(LOG_INFO, "setresuid: %s", strerror(errno)); + do_death(0); + } openlog("authpf", LOG_PID | LOG_NDELAY, LOG_DAEMON); if (!check_luser(PATH_BAN_DIR, luser) || !allowed_luser(luser)) { @@ -268,8 +272,8 @@ main(int argc, char *argv[]) do_death(0); } - if (config == NULL || read_config(config)) { - syslog(LOG_INFO, "bad or nonexistent %s", PATH_CONFFILE); + if (read_config(config)) { + syslog(LOG_ERR, "invalid config file %s", PATH_CONFFILE); do_death(0); } @@ -288,7 +292,7 @@ main(int argc, char *argv[]) printf("Unable to modify filters\r\n"); do_death(0); } - if (change_table(1, luser, ipsrc) == -1) { + if (change_table(1, ipsrc) == -1) { printf("Unable to modify table\r\n"); change_filter(0, luser, ipsrc); do_death(0); @@ -299,7 +303,7 @@ main(int argc, char *argv[]) signal(SIGALRM, need_death); signal(SIGPIPE, need_death); signal(SIGHUP, need_death); - signal(SIGSTOP, need_death); + signal(SIGQUIT, need_death); signal(SIGTSTP, need_death); while (1) { printf("\r\nHello %s. ", luser); @@ -547,9 +551,11 @@ check_luser(char *luserdir, char *luser) while (fputs(tmp, stdout) != EOF && !feof(f)) { if (fgets(tmp, sizeof(tmp), f) == NULL) { fflush(stdout); + fclose(f); return (0); } } + fclose(f); } fflush(stdout); return (0); @@ -633,6 +639,7 @@ change_filter(int add, const char *luser, const char *ipsrc) char *fdpath = NULL, *userstr = NULL, *ipstr = NULL; char *rsn = NULL, *fn = NULL; pid_t pid; + gid_t gid; int s; if (luser == NULL || !luser[0] || ipsrc == NULL || !ipsrc[0]) { @@ -672,8 +679,14 @@ change_filter(int add, const char *luser, const char *ipsrc) switch (pid = fork()) { case -1: - err(1, "fork failed"); + syslog(LOG_ERR, "fork failed"); + goto error; case 0: + /* revoke group privs before exec */ + gid = getgid(); + if (setregid(gid, gid) == -1) { + err(1, "setregid"); + } execvp(PATH_PFCTL, pargv); warn("exec of %s failed", PATH_PFCTL); _exit(1); @@ -682,10 +695,8 @@ change_filter(int add, const char *luser, const char *ipsrc) /* parent */ waitpid(pid, &s, 0); if (s != 0) { - if (WIFEXITED(s)) { - syslog(LOG_ERR, "pfctl exited abnormally"); - goto error; - } + syslog(LOG_ERR, "pfctl exited abnormally"); + goto error; } if (add) { @@ -701,16 +712,10 @@ no_mem: syslog(LOG_ERR, "malloc failed"); error: free(fdpath); - fdpath = NULL; free(rsn); - rsn = NULL; free(userstr); - userstr = NULL; free(ipstr); - ipstr = NULL; free(fn); - fn = NULL; - infile = NULL; return (-1); } @@ -718,13 +723,14 @@ error: * Add/remove this IP from the "authpf_users" table. */ static int -change_table(int add, const char *luser, const char *ipsrc) +change_table(int add, const char *ipsrc) { struct pfioc_table io; struct pfr_addr addr; bzero(&io, sizeof(io)); - strlcpy(io.pfrio_table.pfrt_name, tablename, sizeof(io.pfrio_table)); + strlcpy(io.pfrio_table.pfrt_name, tablename, + sizeof(io.pfrio_table.pfrt_name)); io.pfrio_buffer = &addr; io.pfrio_esize = sizeof(addr); io.pfrio_size = 1; @@ -813,13 +819,11 @@ do_death(int active) if (active) { change_filter(0, luser, ipsrc); - change_table(0, luser, ipsrc); + change_table(0, ipsrc); authpf_kill_states(); remove_stale_rulesets(); } - if (pidfp) - ftruncate(fileno(pidfp), 0); - if (pidfile[0]) + if (pidfile[0] && (pidfp != NULL)) if (unlink(pidfile) == -1) syslog(LOG_ERR, "cannot unlink %s (%m)", pidfile); exit(ret); diff --git a/contrib/pf/ftp-proxy/filter.c b/contrib/pf/ftp-proxy/filter.c new file mode 100644 index 0000000..f86429d --- /dev/null +++ b/contrib/pf/ftp-proxy/filter.c @@ -0,0 +1,387 @@ +/* $OpenBSD: filter.c,v 1.5 2006/12/01 07:31:21 camield Exp $ */ + +/* + * Copyright (c) 2004, 2005 Camiel Dobbelaar, <cd@sentia.nl> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include <net/if.h> +#include <net/pfvar.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "filter.h" + +/* From netinet/in.h, but only _KERNEL_ gets them. */ +#define satosin(sa) ((struct sockaddr_in *)(sa)) +#define satosin6(sa) ((struct sockaddr_in6 *)(sa)) + +enum { TRANS_FILTER = 0, TRANS_NAT, TRANS_RDR, TRANS_SIZE }; + +int prepare_rule(u_int32_t, int, struct sockaddr *, struct sockaddr *, + u_int16_t); +int server_lookup4(struct sockaddr_in *, struct sockaddr_in *, + struct sockaddr_in *); +int server_lookup6(struct sockaddr_in6 *, struct sockaddr_in6 *, + struct sockaddr_in6 *); + +static struct pfioc_pooladdr pfp; +static struct pfioc_rule pfr; +static struct pfioc_trans pft; +static struct pfioc_trans_e pfte[TRANS_SIZE]; +static int dev, rule_log; +static char *qname; + +int +add_filter(u_int32_t id, u_int8_t dir, struct sockaddr *src, + struct sockaddr *dst, u_int16_t d_port) +{ + if (!src || !dst || !d_port) { + errno = EINVAL; + return (-1); + } + + if (prepare_rule(id, PF_RULESET_FILTER, src, dst, d_port) == -1) + return (-1); + + pfr.rule.direction = dir; + if (ioctl(dev, DIOCADDRULE, &pfr) == -1) + return (-1); + + return (0); +} + +int +add_nat(u_int32_t id, struct sockaddr *src, struct sockaddr *dst, + u_int16_t d_port, struct sockaddr *nat, u_int16_t nat_range_low, + u_int16_t nat_range_high) +{ + if (!src || !dst || !d_port || !nat || !nat_range_low || + (src->sa_family != nat->sa_family)) { + errno = EINVAL; + return (-1); + } + + if (prepare_rule(id, PF_RULESET_NAT, src, dst, d_port) == -1) + return (-1); + + if (nat->sa_family == AF_INET) { + memcpy(&pfp.addr.addr.v.a.addr.v4, + &satosin(nat)->sin_addr.s_addr, 4); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 4); + } else { + memcpy(&pfp.addr.addr.v.a.addr.v6, + &satosin6(nat)->sin6_addr.s6_addr, 16); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 16); + } + if (ioctl(dev, DIOCADDADDR, &pfp) == -1) + return (-1); + + pfr.rule.rpool.proxy_port[0] = nat_range_low; + pfr.rule.rpool.proxy_port[1] = nat_range_high; + if (ioctl(dev, DIOCADDRULE, &pfr) == -1) + return (-1); + + return (0); +} + +int +add_rdr(u_int32_t id, struct sockaddr *src, struct sockaddr *dst, + u_int16_t d_port, struct sockaddr *rdr, u_int16_t rdr_port) +{ + if (!src || !dst || !d_port || !rdr || !rdr_port || + (src->sa_family != rdr->sa_family)) { + errno = EINVAL; + return (-1); + } + + if (prepare_rule(id, PF_RULESET_RDR, src, dst, d_port) == -1) + return (-1); + + if (rdr->sa_family == AF_INET) { + memcpy(&pfp.addr.addr.v.a.addr.v4, + &satosin(rdr)->sin_addr.s_addr, 4); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 4); + } else { + memcpy(&pfp.addr.addr.v.a.addr.v6, + &satosin6(rdr)->sin6_addr.s6_addr, 16); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 16); + } + if (ioctl(dev, DIOCADDADDR, &pfp) == -1) + return (-1); + + pfr.rule.rpool.proxy_port[0] = rdr_port; + if (ioctl(dev, DIOCADDRULE, &pfr) == -1) + return (-1); + + return (0); +} + +int +do_commit(void) +{ + if (ioctl(dev, DIOCXCOMMIT, &pft) == -1) + return (-1); + + return (0); +} + +int +do_rollback(void) +{ + if (ioctl(dev, DIOCXROLLBACK, &pft) == -1) + return (-1); + + return (0); +} + +void +init_filter(char *opt_qname, int opt_verbose) +{ + struct pf_status status; + + qname = opt_qname; + + if (opt_verbose == 1) + rule_log = PF_LOG; + else if (opt_verbose == 2) + rule_log = PF_LOG_ALL; + + dev = open("/dev/pf", O_RDWR); + if (dev == -1) + err(1, "/dev/pf"); + if (ioctl(dev, DIOCGETSTATUS, &status) == -1) + err(1, "DIOCGETSTATUS"); + if (!status.running) + errx(1, "pf is disabled"); +} + +int +prepare_commit(u_int32_t id) +{ + char an[PF_ANCHOR_NAME_SIZE]; + int i; + + memset(&pft, 0, sizeof pft); + pft.size = TRANS_SIZE; + pft.esize = sizeof pfte[0]; + pft.array = pfte; + + snprintf(an, PF_ANCHOR_NAME_SIZE, "%s/%d.%d", FTP_PROXY_ANCHOR, + getpid(), id); + for (i = 0; i < TRANS_SIZE; i++) { + memset(&pfte[i], 0, sizeof pfte[0]); + strlcpy(pfte[i].anchor, an, PF_ANCHOR_NAME_SIZE); + switch (i) { + case TRANS_FILTER: + pfte[i].rs_num = PF_RULESET_FILTER; + break; + case TRANS_NAT: + pfte[i].rs_num = PF_RULESET_NAT; + break; + case TRANS_RDR: + pfte[i].rs_num = PF_RULESET_RDR; + break; + default: + errno = EINVAL; + return (-1); + } + } + + if (ioctl(dev, DIOCXBEGIN, &pft) == -1) + return (-1); + + return (0); +} + +int +prepare_rule(u_int32_t id, int rs_num, struct sockaddr *src, + struct sockaddr *dst, u_int16_t d_port) +{ + char an[PF_ANCHOR_NAME_SIZE]; + + if ((src->sa_family != AF_INET && src->sa_family != AF_INET6) || + (src->sa_family != dst->sa_family)) { + errno = EPROTONOSUPPORT; + return (-1); + } + + memset(&pfp, 0, sizeof pfp); + memset(&pfr, 0, sizeof pfr); + snprintf(an, PF_ANCHOR_NAME_SIZE, "%s/%d.%d", FTP_PROXY_ANCHOR, + getpid(), id); + strlcpy(pfp.anchor, an, PF_ANCHOR_NAME_SIZE); + strlcpy(pfr.anchor, an, PF_ANCHOR_NAME_SIZE); + + switch (rs_num) { + case PF_RULESET_FILTER: + pfr.ticket = pfte[TRANS_FILTER].ticket; + break; + case PF_RULESET_NAT: + pfr.ticket = pfte[TRANS_NAT].ticket; + break; + case PF_RULESET_RDR: + pfr.ticket = pfte[TRANS_RDR].ticket; + break; + default: + errno = EINVAL; + return (-1); + } + if (ioctl(dev, DIOCBEGINADDRS, &pfp) == -1) + return (-1); + pfr.pool_ticket = pfp.ticket; + + /* Generic for all rule types. */ + pfr.rule.af = src->sa_family; + pfr.rule.proto = IPPROTO_TCP; + pfr.rule.src.addr.type = PF_ADDR_ADDRMASK; + pfr.rule.dst.addr.type = PF_ADDR_ADDRMASK; + if (src->sa_family == AF_INET) { + memcpy(&pfr.rule.src.addr.v.a.addr.v4, + &satosin(src)->sin_addr.s_addr, 4); + memset(&pfr.rule.src.addr.v.a.mask.addr8, 255, 4); + memcpy(&pfr.rule.dst.addr.v.a.addr.v4, + &satosin(dst)->sin_addr.s_addr, 4); + memset(&pfr.rule.dst.addr.v.a.mask.addr8, 255, 4); + } else { + memcpy(&pfr.rule.src.addr.v.a.addr.v6, + &satosin6(src)->sin6_addr.s6_addr, 16); + memset(&pfr.rule.src.addr.v.a.mask.addr8, 255, 16); + memcpy(&pfr.rule.dst.addr.v.a.addr.v6, + &satosin6(dst)->sin6_addr.s6_addr, 16); + memset(&pfr.rule.dst.addr.v.a.mask.addr8, 255, 16); + } + pfr.rule.dst.port_op = PF_OP_EQ; + pfr.rule.dst.port[0] = htons(d_port); + + switch (rs_num) { + case PF_RULESET_FILTER: + /* + * pass quick [log] inet[6] proto tcp \ + * from $src to $dst port = $d_port flags S/SA keep state + * (max 1) [queue qname] + */ + pfr.rule.action = PF_PASS; + pfr.rule.quick = 1; + pfr.rule.log = rule_log; + pfr.rule.keep_state = 1; + pfr.rule.flags = TH_SYN; + pfr.rule.flagset = (TH_SYN|TH_ACK); + pfr.rule.max_states = 1; + if (qname != NULL) + strlcpy(pfr.rule.qname, qname, sizeof pfr.rule.qname); + break; + case PF_RULESET_NAT: + /* + * nat inet[6] proto tcp from $src to $dst port $d_port -> $nat + */ + pfr.rule.action = PF_NAT; + break; + case PF_RULESET_RDR: + /* + * rdr inet[6] proto tcp from $src to $dst port $d_port -> $rdr + */ + pfr.rule.action = PF_RDR; + break; + default: + errno = EINVAL; + return (-1); + } + + return (0); +} + +int +server_lookup(struct sockaddr *client, struct sockaddr *proxy, + struct sockaddr *server) +{ + if (client->sa_family == AF_INET) + return (server_lookup4(satosin(client), satosin(proxy), + satosin(server))); + + if (client->sa_family == AF_INET6) + return (server_lookup6(satosin6(client), satosin6(proxy), + satosin6(server))); + + errno = EPROTONOSUPPORT; + return (-1); +} + +int +server_lookup4(struct sockaddr_in *client, struct sockaddr_in *proxy, + struct sockaddr_in *server) +{ + struct pfioc_natlook pnl; + + memset(&pnl, 0, sizeof pnl); + pnl.direction = PF_OUT; + pnl.af = AF_INET; + pnl.proto = IPPROTO_TCP; + memcpy(&pnl.saddr.v4, &client->sin_addr.s_addr, sizeof pnl.saddr.v4); + memcpy(&pnl.daddr.v4, &proxy->sin_addr.s_addr, sizeof pnl.daddr.v4); + pnl.sport = client->sin_port; + pnl.dport = proxy->sin_port; + + if (ioctl(dev, DIOCNATLOOK, &pnl) == -1) + return (-1); + + memset(server, 0, sizeof(struct sockaddr_in)); + server->sin_len = sizeof(struct sockaddr_in); + server->sin_family = AF_INET; + memcpy(&server->sin_addr.s_addr, &pnl.rdaddr.v4, + sizeof server->sin_addr.s_addr); + server->sin_port = pnl.rdport; + + return (0); +} + +int +server_lookup6(struct sockaddr_in6 *client, struct sockaddr_in6 *proxy, + struct sockaddr_in6 *server) +{ + struct pfioc_natlook pnl; + + memset(&pnl, 0, sizeof pnl); + pnl.direction = PF_OUT; + pnl.af = AF_INET6; + pnl.proto = IPPROTO_TCP; + memcpy(&pnl.saddr.v6, &client->sin6_addr.s6_addr, sizeof pnl.saddr.v6); + memcpy(&pnl.daddr.v6, &proxy->sin6_addr.s6_addr, sizeof pnl.daddr.v6); + pnl.sport = client->sin6_port; + pnl.dport = proxy->sin6_port; + + if (ioctl(dev, DIOCNATLOOK, &pnl) == -1) + return (-1); + + memset(server, 0, sizeof(struct sockaddr_in6)); + server->sin6_len = sizeof(struct sockaddr_in6); + server->sin6_family = AF_INET6; + memcpy(&server->sin6_addr.s6_addr, &pnl.rdaddr.v6, + sizeof server->sin6_addr); + server->sin6_port = pnl.rdport; + + return (0); +} diff --git a/contrib/pf/ftp-proxy/filter.h b/contrib/pf/ftp-proxy/filter.h new file mode 100644 index 0000000..6779c59 --- /dev/null +++ b/contrib/pf/ftp-proxy/filter.h @@ -0,0 +1,31 @@ +/* $OpenBSD: filter.h,v 1.3 2005/06/07 14:12:07 camield Exp $ */ + +/* + * Copyright (c) 2004, 2005 Camiel Dobbelaar, <cd@sentia.nl> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define FTP_PROXY_ANCHOR "ftp-proxy" + +int add_filter(u_int32_t, u_int8_t, struct sockaddr *, struct sockaddr *, + u_int16_t); +int add_nat(u_int32_t, struct sockaddr *, struct sockaddr *, u_int16_t, + struct sockaddr *, u_int16_t, u_int16_t); +int add_rdr(u_int32_t, struct sockaddr *, struct sockaddr *, u_int16_t, + struct sockaddr *, u_int16_t); +int do_commit(void); +int do_rollback(void); +void init_filter(char *, int); +int prepare_commit(u_int32_t); +int server_lookup(struct sockaddr *, struct sockaddr *, struct sockaddr *); diff --git a/contrib/pf/ftp-proxy/ftp-proxy.8 b/contrib/pf/ftp-proxy/ftp-proxy.8 index e128136..44e6e59 100644 --- a/contrib/pf/ftp-proxy/ftp-proxy.8 +++ b/contrib/pf/ftp-proxy/ftp-proxy.8 @@ -1,293 +1,183 @@ -.\" $OpenBSD: ftp-proxy.8,v 1.42 2004/11/19 00:47:23 jmc Exp $ +.\" $OpenBSD: ftp-proxy.8,v 1.7 2006/12/30 13:01:54 camield Exp $ .\" -.\" Copyright (c) 1996-2001 -.\" Obtuse Systems Corporation, All rights reserved. +.\" Copyright (c) 2004, 2005 Camiel Dobbelaar, <cd@sentia.nl> .\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the University nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. .\" -.\" THIS SOFTWARE IS PROVIDED BY OBTUSE SYSTEMS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL OBTUSE OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd August 17, 2001 +.Dd November 28, 2004 .Dt FTP-PROXY 8 .Os .Sh NAME .Nm ftp-proxy -.Nd Internet File Transfer Protocol proxy server +.Nd Internet File Transfer Protocol proxy daemon .Sh SYNOPSIS .Nm ftp-proxy -.Bk -words -.Op Fl AnrVw +.Op Fl 6Adrv .Op Fl a Ar address -.Op Fl D Ar debuglevel -.Op Fl g Ar group -.Op Fl M Ar maxport -.Op Fl m Ar minport -.Op Fl R Ar address[:port] -.Op Fl S Ar address +.Op Fl b Ar address +.Op Fl D Ar level +.Op Fl m Ar maxsessions +.Op Fl P Ar port +.Op Fl p Ar port +.Op Fl q Ar queue +.Op Fl R Ar address .Op Fl t Ar timeout -.Op Fl u Ar user -.Ek .Sh DESCRIPTION .Nm is a proxy for the Internet File Transfer Protocol. -The proxy uses +FTP control connections should be redirected into the proxy using the .Xr pf 4 -and expects to have the FTP control connection as described in -.Xr services 5 -redirected to it via a +.Ar rdr +command, after which the proxy connects to the server on behalf of +the client. +.Pp +The proxy allows data connections to pass, rewriting and redirecting +them so that the right addresses are used. +All connections from the client to the server have their source +address rewritten so they appear to come from the proxy. +Consequently, all connections from the server to the proxy have +their destination address rewritten, so they are redirected to the +client. +The proxy uses the .Xr pf 4 -.Em rdr -command. -An example of how to do that is further down in this document. +.Ar anchor +facility for this. +.Pp +Assuming the FTP control connection is from $client to $server, the +proxy connected to the server using the $proxy source address, and +$port is negotiated, then +.Nm ftp-proxy +adds the following rules to the various anchors. +(These example rules use inet, but the proxy also supports inet6.) +.Pp +In case of active mode (PORT or EPRT): +.Bd -literal -offset 2n +rdr from $server to $proxy port $port -> $client +pass quick inet proto tcp \e + from $server to $client port $port +.Ed +.Pp +In case of passive mode (PASV or EPSV): +.Bd -literal -offset 2n +nat from $client to $server port $port -> $proxy +pass in quick inet proto tcp \e + from $client to $server port $port +pass out quick inet proto tcp \e + from $proxy to $server port $port +.Ed .Pp The options are as follows: .Bl -tag -width Ds +.It Fl 6 +IPv6 mode. +The proxy will expect and use IPv6 addresses for all communication. +Only the extended FTP modes EPSV and EPRT are allowed with IPv6. +The proxy is in IPv4 mode by default. .It Fl A -Permit only anonymous FTP connections. -The proxy will allow connections to log in to other sites as the user -.Qq ftp -or -.Qq anonymous -only. -Any attempt to log in as another user will be blocked by the proxy. +Only permit anonymous FTP connections. +Either user "ftp" or user "anonymous" is allowed. .It Fl a Ar address -Specify the local IP address to use in -.Xr bind 2 -as the source for connections made by -.Nm ftp-proxy -when connecting to destination FTP servers. -This may be necessary if the interface address of -your default route is not reachable from the destinations -.Nm -is attempting connections to, or this address is different from the one -connections are being NATed to. -In the usual case this means that -.Ar address -should be a publicly visible IP address assigned to one of -the interfaces on the machine running -.Nm -and should be the same address to which you are translating traffic -if you are using the -.Fl n -option. -.It Fl D Ar debuglevel -Specify a debug level, where the proxy emits verbose debug output -into -.Xr syslogd 8 -at level -.Dv LOG_DEBUG . -Meaningful values of debuglevel are 0-3, where 0 is no debug output and -3 is lots of debug output, the default being 0. -.It Fl g Ar group -Specify the named group to drop group privileges to, after doing -.Xr pf 4 -lookups which require root. -By default, -.Nm -uses the default group of the user it drops privilege to. -.It Fl M Ar maxport -Specify the upper end of the port range the proxy will use for the -data connections it establishes. -The default is -.Dv IPPORT_HILASTAUTO -defined in -.Aq Pa netinet/in.h -as 65535. -.It Fl m Ar minport -Specify the lower end of the port range the proxy will use for all -data connections it establishes. -The default is -.Dv IPPORT_HIFIRSTAUTO -defined in -.Aq Pa netinet/in.h -as 49152. -.It Fl n -Activate network address translation -.Pq NAT -mode. -In this mode, the proxy will not attempt to proxy passive mode -.Pq PASV or EPSV -data connections. -In order for this to work, the machine running the proxy will need to -be forwarding packets and doing network address translation to allow -the outbound passive connections from the client to reach the server. -See -.Xr pf.conf 5 -for more details on NAT. -The proxy only ignores passive mode data connections when using this flag; -it will still proxy PORT and EPRT mode data connections. -Without this flag, -.Nm -does not require any IP forwarding or NAT beyond the -.Em rdr -necessary to capture the FTP control connection. -.It Fl R Ar address:[port] -Reverse proxy mode for FTP servers running behind a NAT gateway. -In this mode, no redirection is needed. -The proxy is run from -.Xr inetd 8 -on the port that external clients connect to (usually 21). -Control connections and passive data connections are forwarded -to the server. +The proxy will use this as the source address for the control +connection to a server. +.It Fl b Ar address +Address where the proxy will listen for redirected control connections. +The default is 127.0.0.1, or ::1 in IPv6 mode. +.It Fl D Ar level +Debug level, ranging from 0 to 7. +Higher is more verbose. +The default is 5. +(These levels correspond to the +.Xr syslog 3 +levels.) +.It Fl d +Do not daemonize. +The process will stay in the foreground, logging to standard error. +.It Fl m Ar maxsessions +Maximum number of concurrent FTP sessions. +When the proxy reaches this limit, new connections are denied. +The default is 100 sessions. +The limit can be lowered to a minimum of 1, or raised to a maximum of 500. +.It Fl P Ar port +Fixed server port. +Only used in combination with +.Fl R . +The default is port 21. +.It Fl p Ar port +Port where the proxy will listen for redirected connections. +The default is port 8021. +.It Fl q Ar queue +Create rules with queue +.Ar queue +appended, so that data connections can be queued. +.It Fl R Ar address +Fixed server address, also known as reverse mode. +The proxy will always connect to the same server, regardless of +where the client wanted to connect to (before it was redirected). +Use this option to proxy for a server behind NAT, or to forward all +connections to another proxy. .It Fl r -Use reverse host -.Pq reverse DNS -lookups for logging and libwrap use. -By default, -the proxy does not look up hostnames for libwrap or logging purposes. -.It Fl S Ar address -Source address to use for data connections made by the proxy. -Useful when there are multiple addresses (aliases) available -to the proxy. -Clients may expect data connections to have the same source -address as the control connections, and reject or drop other -connections. +Rewrite sourceport to 20 in active mode to suit ancient clients that insist +on this RFC property. .It Fl t Ar timeout -Specifies a timeout, in seconds. -The proxy will exit and close open connections if it sees no data -for the duration of the timeout. -The default is 0, which means the proxy will not time out. -.It Fl u Ar user -Specify the named user to drop privilege to, after doing -.Xr pf 4 -lookups which require root privilege. -By default, -.Nm -drops privilege to the user -.Em proxy . -.Pp -Running as root means that the source of data connections the proxy makes -for PORT and EPRT will be the RFC mandated port 20. -When running as a non-root user, the source of the data connections from -.Nm -will be chosen randomly from the range -.Ar minport -to -.Ar maxport -as described above. -.It Fl V -Be verbose. -With this option the proxy logs the control commands -sent by clients and the replies sent by the servers to -.Xr syslogd 8 . -.It Fl w -Use the tcp wrapper access control library -.Xr hosts_access 3 , -allowing connections to be allowed or denied based on the tcp wrapper's -.Xr hosts.allow 5 -and -.Xr hosts.deny 5 -files. -The proxy does libwrap operations after determining the destination -of the captured control connection, so that tcp wrapper rules may -be written based on the destination as well as the source of FTP connections. +Number of seconds that the control connection can be idle, before the +proxy will disconnect. +The maximum is 86400 seconds, which is also the default. +Do not set this too low, because the control connection is usually +idle when large data transfers are taking place. +.It Fl v +Set the 'log' flag on pf rules committed by +.Nm . +Use twice to set the 'log-all' flag. +The pf rules do not log by default. .El -.Pp -.Nm ftp-proxy -is run from -.Xr inetd 8 -and requires that FTP connections are redirected to it using a -.Em rdr -rule. -A typical way to do this would be to use a +.Sh CONFIGURATION +To make use of the proxy, .Xr pf.conf 5 -rule such as -.Bd -literal -offset 2n -int_if = \&"xl0\&" -rdr pass on $int_if proto tcp from any to any port 21 -> 127.0.0.1 port 8021 -.Ed +needs the following rules. +All anchors are mandatory. +Adjust the rules as needed. .Pp -.Xr inetd 8 -must then be configured to run -.Nm -on the port from above using +In the NAT section: .Bd -literal -offset 2n -127.0.0.1:8021 stream tcp nowait root /usr/libexec/ftp-proxy ftp-proxy +nat-anchor "ftp-proxy/*" +rdr-anchor "ftp-proxy/*" +rdr pass on $int_if proto tcp from $lan to any port 21 -> \e + 127.0.0.1 port 8021 .Ed .Pp -in -.Xr inetd.conf 5 . -.Pp -.Nm -accepts the redirected control connections and forwards them -to the server. -The proxy replaces the address and port number that the client -sends through the control connection to the server with its own -address and proxy port, where it listens for the data connection. -When the server opens the data connection back to this port, the -proxy forwards it to the client. -The -.Xr pf.conf 5 -rules need to let pass connections to these proxy ports -(see options -.Fl u , m , -and -.Fl M -above) in on the external interface. -The following example allows only ports 49152 to 65535 to pass in -statefully: -.Bd -literal -offset indent -block in on $ext_if proto tcp all -pass in on $ext_if inet proto tcp from any to $ext_if \e - port > 49151 keep state -.Ed -.Pp -Alternatively, rules can make use of the fact that by default, -.Nm -runs as user -.Qq proxy -to allow the backchannel connections, as in the following example: -.Bd -literal -offset indent -block in on $ext_if proto tcp all -pass in on $ext_if inet proto tcp from any to $ext_if \e - user proxy keep state +In the rule section: +.Bd -literal -offset 2n +anchor "ftp-proxy/*" +pass out proto tcp from $proxy to any port 21 .Ed -.Pp -These examples do not cover the connections from the proxy to the -foreign FTP server. -If one does not pass outgoing connections by default additional rules -are needed. .Sh SEE ALSO .Xr ftp 1 , .Xr pf 4 , -.Xr hosts.allow 5 , -.Xr hosts.deny 5 , -.Xr inetd.conf 5 , -.Xr pf.conf 5 , -.Xr inetd 8 , -.Xr pfctl 8 , -.Xr syslogd 8 -.Sh BUGS -Extended Passive mode -.Pq EPSV -is not supported by the proxy and will not work unless the proxy is run -in network address translation mode. -When not in network address translation mode, the proxy returns an error -to the client, hopefully forcing the client to revert to passive mode -.Pq PASV -which is supported. -EPSV will work in network address translation mode, assuming a .Xr pf.conf 5 -setup which allows the EPSV connections through to their destinations. +.Sh CAVEATS +.Xr pf 4 +does not allow the ruleset to be modified if the system is running at a +.Xr securelevel 7 +higher than 1. +At that level +.Nm ftp-proxy +cannot add rules to the anchors and FTP data connections may get blocked. +.Pp +Negotiated data connection ports below 1024 are not allowed. .Pp -IPv6 is not yet supported. +The negotiated IP address for active modes is ignored for security +reasons. +This makes third party file transfers impossible. +.Pp +.Nm ftp-proxy +chroots to "/var/empty" and changes to user "proxy" to drop privileges. diff --git a/contrib/pf/ftp-proxy/ftp-proxy.c b/contrib/pf/ftp-proxy/ftp-proxy.c index dd0c654..99e4174 100644 --- a/contrib/pf/ftp-proxy/ftp-proxy.c +++ b/contrib/pf/ftp-proxy/ftp-proxy.c @@ -1,84 +1,36 @@ -/* $OpenBSD: ftp-proxy.c,v 1.41 2005/03/05 23:11:19 cloder Exp $ */ +/* $OpenBSD: ftp-proxy.c,v 1.13 2006/12/30 13:24:00 camield Exp $ */ /* - * Copyright (c) 1996-2001 - * Obtuse Systems Corporation. All rights reserved. + * Copyright (c) 2004, 2005 Camiel Dobbelaar, <cd@sentia.nl> * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the Obtuse Systems nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY OBTUSE SYSTEMS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL OBTUSE SYSTEMS CORPORATION OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -/* - * ftp proxy, Originally based on juniper_ftp_proxy from the Obtuse - * Systems juniper firewall, written by Dan Boulet <danny@obtuse.com> - * and Bob Beck <beck@obtuse.com> - * - * This version basically passes everything through unchanged except - * for the PORT and the * "227 Entering Passive Mode" reply. - * - * A PORT command is handled by noting the IP address and port number - * specified and then configuring a listen port on some very high port - * number and telling the server about it using a PORT message. - * We then watch for an in-bound connection on the port from the server - * and connect to the client's port when it happens. - * - * A "227 Entering Passive Mode" reply is handled by noting the IP address - * and port number specified and then configuring a listen port on some - * very high port number and telling the client about it using a - * "227 Entering Passive Mode" reply. - * We then watch for an in-bound connection on the port from the client - * and connect to the server's port when it happens. - * - * supports tcp wrapper lookups/access control with the -w flag using - * the real destination address - the tcp wrapper stuff is done after - * the real destination address is retrieved from pf + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -/* - * TODO: - * Plenty, this is very basic, with the idea to get it in clean first. - * - * - IPv6 and EPASV support - * - Content filter support - * - filename filter support - * - per-user rules perhaps. - */ - -#include <sys/param.h> +#include <sys/queue.h> +#include <sys/types.h> #include <sys/time.h> +#include <sys/resource.h> #include <sys/socket.h> #include <net/if.h> +#include <net/pfvar.h> #include <netinet/in.h> - #include <arpa/inet.h> -#include <ctype.h> +#include <err.h> #include <errno.h> -#include <grp.h> +#include <event.h> +#include <fcntl.h> #include <netdb.h> #include <pwd.h> #include <signal.h> @@ -86,1288 +38,1056 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sysexits.h> #include <syslog.h> #include <unistd.h> - -#include "util.h" - -#ifdef LIBWRAP -#include <tcpd.h> -int allow_severity = LOG_INFO; -int deny_severity = LOG_NOTICE; -#endif /* LIBWRAP */ - -int min_port = IPPORT_HIFIRSTAUTO; -int max_port = IPPORT_HILASTAUTO; - -#define STARTBUFSIZE 1024 /* Must be at least 3 */ - -/* - * Variables used to support PORT mode connections. - * - * This gets a bit complicated. - * - * If PORT mode is on then client_listen_sa describes the socket that - * the real client is listening on and server_listen_sa describes the - * socket that we are listening on (waiting for the real server to connect - * with us). - * - * If PASV mode is on then client_listen_sa describes the socket that - * we are listening on (waiting for the real client to connect to us on) - * and server_listen_sa describes the socket that the real server is - * listening on. - * - * If the socket we are listening on gets a connection then we connect - * to the other side's socket. Similarly, if a connected socket is - * shutdown then we shutdown the other side's socket. - */ - -double xfer_start_time; - -struct sockaddr_in real_server_sa; -struct sockaddr_in client_listen_sa; -struct sockaddr_in server_listen_sa; -struct sockaddr_in proxy_sa; -struct in_addr src_addr; - -int client_listen_socket = -1; /* Only used in PASV mode */ -int client_data_socket = -1; /* Connected socket to real client */ -int server_listen_socket = -1; /* Only used in PORT mode */ -int server_data_socket = -1; /* Connected socket to real server */ -int client_data_bytes, server_data_bytes; - -int AnonFtpOnly; -int Verbose; -int NatMode; -int ReverseMode; - -char ClientName[NI_MAXHOST]; -char RealServerName[NI_MAXHOST]; -char OurName[NI_MAXHOST]; - -const char *User = "proxy"; -const char *Group; - -extern int Debug_Level; -extern int Use_Rdns; -extern in_addr_t Bind_Addr; +#include <vis.h> + +#include "filter.h" + +#define CONNECT_TIMEOUT 30 +#define MIN_PORT 1024 +#define MAX_LINE 500 +#define MAX_LOGLINE 300 +#define NTOP_BUFS 3 +#define TCP_BACKLOG 10 + +#define CHROOT_DIR "/var/empty" +#define NOPRIV_USER "proxy" + +/* pfctl standard NAT range. */ +#define PF_NAT_PROXY_PORT_LOW 50001 +#define PF_NAT_PROXY_PORT_HIGH 65535 + +#define sstosa(ss) ((struct sockaddr *)(ss)) + +enum { CMD_NONE = 0, CMD_PORT, CMD_EPRT, CMD_PASV, CMD_EPSV }; + +struct session { + u_int32_t id; + struct sockaddr_storage client_ss; + struct sockaddr_storage proxy_ss; + struct sockaddr_storage server_ss; + struct sockaddr_storage orig_server_ss; + struct bufferevent *client_bufev; + struct bufferevent *server_bufev; + int client_fd; + int server_fd; + char cbuf[MAX_LINE]; + size_t cbuf_valid; + char sbuf[MAX_LINE]; + size_t sbuf_valid; + int cmd; + u_int16_t port; + u_int16_t proxy_port; + LIST_ENTRY(session) entry; +}; + +LIST_HEAD(, session) sessions = LIST_HEAD_INITIALIZER(sessions); + +void client_error(struct bufferevent *, short, void *); +int client_parse(struct session *s); +int client_parse_anon(struct session *s); +int client_parse_cmd(struct session *s); +void client_read(struct bufferevent *, void *); +int drop_privs(void); +void end_session(struct session *); +int exit_daemon(void); +int getline(char *, size_t *); +void handle_connection(const int, short, void *); +void handle_signal(int, short, void *); +struct session * init_session(void); +void logmsg(int, const char *, ...); +u_int16_t parse_port(int); +u_int16_t pick_proxy_port(void); +void proxy_reply(int, struct sockaddr *, u_int16_t); +void server_error(struct bufferevent *, short, void *); +int server_parse(struct session *s); +void server_read(struct bufferevent *, void *); +const char *sock_ntop(struct sockaddr *); +void usage(void); + +char linebuf[MAX_LINE + 1]; +size_t linelen; + +char ntop_buf[NTOP_BUFS][INET6_ADDRSTRLEN]; + +struct sockaddr_storage fixed_server_ss, fixed_proxy_ss; +char *fixed_server, *fixed_server_port, *fixed_proxy, *listen_ip, *listen_port, + *qname; +int anonymous_only, daemonize, id_count, ipv6_mode, loglevel, max_sessions, + rfc_mode, session_count, timeout, verbose; extern char *__progname; -typedef enum { - UNKNOWN_MODE, - PORT_MODE, - PASV_MODE, - EPRT_MODE, - EPSV_MODE -} connection_mode_t; - -connection_mode_t connection_mode; - -extern void debuglog(int debug_level, const char *fmt, ...); -double wallclock_time(void); -void show_xfer_stats(void); -void log_control_command (char *cmd, int client); -int new_dataconn(int server); -void do_client_cmd(struct csiob *client, struct csiob *server); -void do_server_reply(struct csiob *server, struct csiob *client); -static void -usage(void) +void +client_error(struct bufferevent *bufev, short what, void *arg) { - syslog(LOG_NOTICE, - "usage: %s [-AnrVw] [-a address] [-D debuglevel] [-g group]" - " [-M maxport] [-m minport] [-R address[:port]] [-S address]" - " [-t timeout] [-u user]", __progname); - exit(EX_USAGE); + struct session *s = arg; + + if (what & EVBUFFER_EOF) + logmsg(LOG_INFO, "#%d client close", s->id); + else if (what == (EVBUFFER_ERROR | EVBUFFER_READ)) + logmsg(LOG_ERR, "#%d client reset connection", s->id); + else if (what & EVBUFFER_TIMEOUT) + logmsg(LOG_ERR, "#%d client timeout", s->id); + else if (what & EVBUFFER_WRITE) + logmsg(LOG_ERR, "#%d client write error: %d", s->id, what); + else + logmsg(LOG_ERR, "#%d abnormal client error: %d", s->id, what); + + end_session(s); } -static void -close_client_data(void) +int +client_parse(struct session *s) { - if (client_data_socket >= 0) { - shutdown(client_data_socket, 2); - close(client_data_socket); - client_data_socket = -1; - } + /* Reset any previous command. */ + s->cmd = CMD_NONE; + s->port = 0; + + /* Commands we are looking for are at least 4 chars long. */ + if (linelen < 4) + return (1); + + if (linebuf[0] == 'P' || linebuf[0] == 'p' || + linebuf[0] == 'E' || linebuf[0] == 'e') + return (client_parse_cmd(s)); + + if (anonymous_only && (linebuf[0] == 'U' || linebuf[0] == 'u')) + return (client_parse_anon(s)); + + return (1); } -static void -close_server_data(void) +int +client_parse_anon(struct session *s) { - if (server_data_socket >= 0) { - shutdown(server_data_socket, 2); - close(server_data_socket); - server_data_socket = -1; + if (strcasecmp("USER ftp\r\n", linebuf) != 0 && + strcasecmp("USER anonymous\r\n", linebuf) != 0) { + snprintf(linebuf, sizeof linebuf, + "500 Only anonymous FTP allowed\r\n"); + logmsg(LOG_DEBUG, "#%d proxy: %s", s->id, linebuf); + + /* Talk back to the client ourself. */ + linelen = strlen(linebuf); + bufferevent_write(s->client_bufev, linebuf, linelen); + + /* Clear buffer so it's not sent to the server. */ + linebuf[0] = '\0'; + linelen = 0; } + + return (1); } -static void -drop_privs(void) +int +client_parse_cmd(struct session *s) { - struct passwd *pw; - struct group *gr; - uid_t uid = 0; - gid_t gid = 0; - - if (User != NULL) { - pw = getpwnam(User); - if (pw == NULL) { - syslog(LOG_ERR, "cannot find user %s", User); - exit(EX_USAGE); - } - uid = pw->pw_uid; - gid = pw->pw_gid; - } + if (strncasecmp("PASV", linebuf, 4) == 0) + s->cmd = CMD_PASV; + else if (strncasecmp("PORT ", linebuf, 5) == 0) + s->cmd = CMD_PORT; + else if (strncasecmp("EPSV", linebuf, 4) == 0) + s->cmd = CMD_EPSV; + else if (strncasecmp("EPRT ", linebuf, 5) == 0) + s->cmd = CMD_EPRT; + else + return (1); - if (Group != NULL) { - gr = getgrnam(Group); - if (gr == NULL) { - syslog(LOG_ERR, "cannot find group %s", Group); - exit(EX_USAGE); - } - gid = gr->gr_gid; + if (ipv6_mode && (s->cmd == CMD_PASV || s->cmd == CMD_PORT)) { + logmsg(LOG_CRIT, "PASV and PORT not allowed with IPv6"); + return (0); } - if (gid != 0 && (setegid(gid) == -1 || setgid(gid) == -1)) { - syslog(LOG_ERR, "cannot drop group privs (%m)"); - exit(EX_CONFIG); + if (s->cmd == CMD_PORT || s->cmd == CMD_EPRT) { + s->port = parse_port(s->cmd); + if (s->port < MIN_PORT) { + logmsg(LOG_CRIT, "#%d bad port in '%s'", s->id, + linebuf); + return (0); + } + s->proxy_port = pick_proxy_port(); + proxy_reply(s->cmd, sstosa(&s->proxy_ss), s->proxy_port); + logmsg(LOG_DEBUG, "#%d proxy: %s", s->id, linebuf); } - if (uid != 0 && (seteuid(uid) == -1 || setuid(uid) == -1)) { - syslog(LOG_ERR, "cannot drop root privs (%m)"); - exit(EX_CONFIG); - } + return (1); } -#ifdef LIBWRAP -/* - * Check a connection against the tcpwrapper, log if we're going to - * reject it, returns: 0 -> reject, 1 -> accept. We add in hostnames - * if we are set to do reverse DNS, otherwise no. - */ -static int -check_host(struct sockaddr_in *client_sin, struct sockaddr_in *server_sin) +void +client_read(struct bufferevent *bufev, void *arg) { - char cname[NI_MAXHOST]; - char sname[NI_MAXHOST]; - struct request_info request; - int i; - - request_init(&request, RQ_DAEMON, __progname, RQ_CLIENT_SIN, - client_sin, RQ_SERVER_SIN, server_sin, RQ_CLIENT_ADDR, - inet_ntoa(client_sin->sin_addr), 0); - - if (Use_Rdns) { - /* - * We already looked these up, but we have to do it again - * for tcp wrapper, to ensure that we get the DNS name, since - * the tcp wrapper cares about these things, and we don't - * want to pass in a printed address as a name. - */ - i = getnameinfo((struct sockaddr *) &client_sin->sin_addr, - sizeof(&client_sin->sin_addr), cname, sizeof(cname), - NULL, 0, NI_NAMEREQD); - - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) - strlcpy(cname, STRING_UNKNOWN, sizeof(cname)); - - i = getnameinfo((struct sockaddr *)&server_sin->sin_addr, - sizeof(&server_sin->sin_addr), sname, sizeof(sname), - NULL, 0, NI_NAMEREQD); - - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) - strlcpy(sname, STRING_UNKNOWN, sizeof(sname)); - } else { - /* - * ensure the TCP wrapper doesn't start doing - * reverse DNS lookups if we aren't supposed to. - */ - strlcpy(cname, STRING_UNKNOWN, sizeof(cname)); - strlcpy(sname, STRING_UNKNOWN, sizeof(sname)); - } + struct session *s = arg; + size_t buf_avail, read; + int n; - request_set(&request, RQ_SERVER_ADDR, inet_ntoa(server_sin->sin_addr), - 0); - request_set(&request, RQ_CLIENT_NAME, cname, RQ_SERVER_NAME, sname, 0); + do { + buf_avail = sizeof s->cbuf - s->cbuf_valid; + read = bufferevent_read(bufev, s->cbuf + s->cbuf_valid, + buf_avail); + s->cbuf_valid += read; + + while ((n = getline(s->cbuf, &s->cbuf_valid)) > 0) { + logmsg(LOG_DEBUG, "#%d client: %s", s->id, linebuf); + if (!client_parse(s)) { + end_session(s); + return; + } + bufferevent_write(s->server_bufev, linebuf, linelen); + } - if (!hosts_access(&request)) { - syslog(LOG_NOTICE, "tcpwrappers rejected: %s -> %s", - ClientName, RealServerName); - return(0); - } - return(1); + if (n == -1) { + logmsg(LOG_ERR, "#%d client command too long or not" + " clean", s->id); + end_session(s); + return; + } + } while (read == buf_avail); } -#endif /* LIBWRAP */ -double -wallclock_time(void) +int +drop_privs(void) { - struct timeval tv; + struct passwd *pw; + + pw = getpwnam(NOPRIV_USER); + if (pw == NULL) + return (0); - gettimeofday(&tv, NULL); - return(tv.tv_sec + tv.tv_usec / 1e6); + tzset(); + if (chroot(CHROOT_DIR) != 0 || chdir("/") != 0 || + setgroups(1, &pw->pw_gid) != 0 || + setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) != 0 || + setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) != 0) + return (0); + + return (1); } -/* - * Show the stats for this data transfer - */ void -show_xfer_stats(void) +end_session(struct session *s) { - char tbuf[1000]; - double delta; - size_t len; - int i = -1; - - if (!Verbose) - return; - - delta = wallclock_time() - xfer_start_time; - - if (delta < 0.001) - delta = 0.001; - - if (client_data_bytes == 0 && server_data_bytes == 0) { - syslog(LOG_INFO, - "data transfer complete (no bytes transferred)"); - return; + int err; + + logmsg(LOG_INFO, "#%d ending session", s->id); + + if (s->client_fd != -1) + close(s->client_fd); + if (s->server_fd != -1) + close(s->server_fd); + + if (s->client_bufev) + bufferevent_free(s->client_bufev); + if (s->server_bufev) + bufferevent_free(s->server_bufev); + + /* Remove rulesets by commiting empty ones. */ + err = 0; + if (prepare_commit(s->id) == -1) + err = errno; + else if (do_commit() == -1) { + err = errno; + do_rollback(); } + if (err) + logmsg(LOG_ERR, "#%d pf rule removal failed: %s", s->id, + strerror(err)); - len = sizeof(tbuf); - - if (delta >= 60) { - int idelta; + LIST_REMOVE(s, entry); + free(s); + session_count--; +} - idelta = delta + 0.5; - if (idelta >= 60*60) { - i = snprintf(tbuf, len, - "data transfer complete (%dh %dm %ds", - idelta / (60*60), (idelta % (60*60)) / 60, - idelta % 60); - if (i == -1 || i >= len) - goto logit; - len -= i; - } else { - i = snprintf(tbuf, len, - "data transfer complete (%dm %ds", idelta / 60, - idelta % 60); - if (i == -1 || i >= len) - goto logit; - len -= i; - } - } else { - i = snprintf(tbuf, len, "data transfer complete (%.1fs", - delta); - if (i == -1 || i >= len) - goto logit; - len -= i; - } +int +exit_daemon(void) +{ + struct session *s, *next; - if (client_data_bytes > 0) { - i = snprintf(&tbuf[strlen(tbuf)], len, - ", %d bytes to server) (%.1fKB/s", client_data_bytes, - (client_data_bytes / delta) / (double)1024); - if (i == -1 || i >= len) - goto logit; - len -= i; - } - if (server_data_bytes > 0) { - i = snprintf(&tbuf[strlen(tbuf)], len, - ", %d bytes to client) (%.1fKB/s", server_data_bytes, - (server_data_bytes / delta) / (double)1024); - if (i == -1 || i >= len) - goto logit; - len -= i; + for (s = LIST_FIRST(&sessions); s != LIST_END(&sessions); s = next) { + next = LIST_NEXT(s, entry); + end_session(s); } - strlcat(tbuf, ")", sizeof(tbuf)); - logit: - if (i != -1) - syslog(LOG_INFO, "%s", tbuf); -} -void -log_control_command (char *cmd, int client) -{ - /* log an ftp control command or reply */ - const char *logstring; - int level = LOG_DEBUG; + if (daemonize) + closelog(); - if (!Verbose) - return; + exit(0); - /* don't log passwords */ - if (strncasecmp(cmd, "pass ", 5) == 0) - logstring = "PASS XXXX"; - else - logstring = cmd; - if (client) { - /* log interesting stuff at LOG_INFO, rest at LOG_DEBUG */ - if ((strncasecmp(cmd, "user ", 5) == 0) || - (strncasecmp(cmd, "retr ", 5) == 0) || - (strncasecmp(cmd, "cwd ", 4) == 0) || - (strncasecmp(cmd, "stor " ,5) == 0)) - level = LOG_INFO; - } - syslog(level, "%s %s", client ? "client:" : " server:", - logstring); + /* NOTREACHED */ + return (-1); } -/* - * set ourselves up for a new data connection. Direction is toward client if - * "server" is 0, towards server otherwise. - */ int -new_dataconn(int server) +getline(char *buf, size_t *valid) { - /* - * Close existing data conn. - */ + size_t i; - if (client_listen_socket != -1) { - close(client_listen_socket); - client_listen_socket = -1; + if (*valid > MAX_LINE) + return (-1); + + /* Copy to linebuf while searching for a newline. */ + for (i = 0; i < *valid; i++) { + linebuf[i] = buf[i]; + if (buf[i] == '\0') + return (-1); + if (buf[i] == '\n') + break; } - close_client_data(); - if (server_listen_socket != -1) { - close(server_listen_socket); - server_listen_socket = -1; + if (i == *valid) { + /* No newline found. */ + linebuf[0] = '\0'; + linelen = 0; + if (i < MAX_LINE) + return (0); + return (-1); } - close_server_data(); - if (server) { - bzero(&server_listen_sa, sizeof(server_listen_sa)); - server_listen_socket = get_backchannel_socket(SOCK_STREAM, - min_port, max_port, -1, 1, &server_listen_sa); + linelen = i + 1; + linebuf[linelen] = '\0'; + *valid -= linelen; + + /* Move leftovers to the start. */ + if (*valid != 0) + bcopy(buf + linelen, buf, *valid); - if (server_listen_socket == -1) { - syslog(LOG_INFO, "server socket bind() failed (%m)"); - exit(EX_OSERR); - } - if (listen(server_listen_socket, 5) != 0) { - syslog(LOG_INFO, "server socket listen() failed (%m)"); - exit(EX_OSERR); - } - } else { - bzero(&client_listen_sa, sizeof(client_listen_sa)); - client_listen_socket = get_backchannel_socket(SOCK_STREAM, - min_port, max_port, -1, 1, &client_listen_sa); - - if (client_listen_socket == -1) { - syslog(LOG_NOTICE, - "cannot get client listen socket (%m)"); - exit(EX_OSERR); - } - if (listen(client_listen_socket, 5) != 0) { - syslog(LOG_NOTICE, - "cannot listen on client socket (%m)"); - exit(EX_OSERR); - } - } - return(0); + return ((int)linelen); } -static void -connect_pasv_backchannel(void) +void +handle_connection(const int listen_fd, short event, void *ev) { - struct sockaddr_in listen_sa; - socklen_t salen; + struct sockaddr_storage tmp_ss; + struct sockaddr *client_sa, *server_sa, *fixed_server_sa; + struct sockaddr *client_to_proxy_sa, *proxy_to_server_sa; + struct session *s; + socklen_t len; + int client_fd, fc, on; /* - * We are about to accept a connection from the client. - * This is a PASV data connection. + * We _must_ accept the connection, otherwise libevent will keep + * coming back, and we will chew up all CPU. */ - debuglog(2, "client listen socket ready"); + client_sa = sstosa(&tmp_ss); + len = sizeof(struct sockaddr_storage); + if ((client_fd = accept(listen_fd, client_sa, &len)) < 0) { + logmsg(LOG_CRIT, "accept failed: %s", strerror(errno)); + return; + } - close_server_data(); - close_client_data(); + /* Refuse connection if the maximum is reached. */ + if (session_count >= max_sessions) { + logmsg(LOG_ERR, "client limit (%d) reached, refusing " + "connection from %s", max_sessions, sock_ntop(client_sa)); + close(client_fd); + return; + } + + /* Allocate session and copy back the info from the accept(). */ + s = init_session(); + if (s == NULL) { + logmsg(LOG_CRIT, "init_session failed"); + close(client_fd); + return; + } + s->client_fd = client_fd; + memcpy(sstosa(&s->client_ss), client_sa, client_sa->sa_len); - salen = sizeof(listen_sa); - client_data_socket = accept(client_listen_socket, - (struct sockaddr *)&listen_sa, &salen); + /* Cast it once, and be done with it. */ + client_sa = sstosa(&s->client_ss); + server_sa = sstosa(&s->server_ss); + client_to_proxy_sa = sstosa(&tmp_ss); + proxy_to_server_sa = sstosa(&s->proxy_ss); + fixed_server_sa = sstosa(&fixed_server_ss); - if (client_data_socket < 0) { - syslog(LOG_NOTICE, "accept() failed (%m)"); - exit(EX_OSERR); + /* Log id/client early to ease debugging. */ + logmsg(LOG_DEBUG, "#%d accepted connection from %s", s->id, + sock_ntop(client_sa)); + + /* + * Find out the real server and port that the client wanted. + */ + len = sizeof(struct sockaddr_storage); + if ((getsockname(s->client_fd, client_to_proxy_sa, &len)) < 0) { + logmsg(LOG_CRIT, "#%d getsockname failed: %s", s->id, + strerror(errno)); + goto fail; } - close(client_listen_socket); - client_listen_socket = -1; - memset(&listen_sa, 0, sizeof(listen_sa)); - - server_data_socket = get_backchannel_socket(SOCK_STREAM, min_port, - max_port, -1, 1, &listen_sa); - if (server_data_socket < 0) { - syslog(LOG_NOTICE, "get_backchannel_socket() failed (%m)"); - exit(EX_OSERR); + if (server_lookup(client_sa, client_to_proxy_sa, server_sa) != 0) { + logmsg(LOG_CRIT, "#%d server lookup failed (no rdr?)", s->id); + goto fail; } - if (connect(server_data_socket, (struct sockaddr *) &server_listen_sa, - sizeof(server_listen_sa)) != 0) { - syslog(LOG_NOTICE, "connect() failed (%m)"); - exit(EX_NOHOST); + if (fixed_server) { + memcpy(sstosa(&s->orig_server_ss), server_sa, + server_sa->sa_len); + memcpy(server_sa, fixed_server_sa, fixed_server_sa->sa_len); } - client_data_bytes = 0; - server_data_bytes = 0; - xfer_start_time = wallclock_time(); -} -static void -connect_port_backchannel(void) -{ - struct sockaddr_in listen_sa; - socklen_t salen; + /* XXX: check we are not connecting to ourself. */ /* - * We are about to accept a connection from the server. - * This is a PORT or EPRT data connection. + * Setup socket and connect to server. */ - debuglog(2, "server listen socket ready"); - - close_server_data(); - close_client_data(); + if ((s->server_fd = socket(server_sa->sa_family, SOCK_STREAM, + IPPROTO_TCP)) < 0) { + logmsg(LOG_CRIT, "#%d server socket failed: %s", s->id, + strerror(errno)); + goto fail; + } + if (fixed_proxy && bind(s->server_fd, sstosa(&fixed_proxy_ss), + fixed_proxy_ss.ss_len) != 0) { + logmsg(LOG_CRIT, "#%d cannot bind fixed proxy address: %s", + s->id, strerror(errno)); + goto fail; + } - salen = sizeof(listen_sa); - server_data_socket = accept(server_listen_socket, - (struct sockaddr *)&listen_sa, &salen); - if (server_data_socket < 0) { - syslog(LOG_NOTICE, "accept() failed (%m)"); - exit(EX_OSERR); + /* Use non-blocking connect(), see CONNECT_TIMEOUT below. */ + if ((fc = fcntl(s->server_fd, F_GETFL)) == -1 || + fcntl(s->server_fd, F_SETFL, fc | O_NONBLOCK) == -1) { + logmsg(LOG_CRIT, "#%d cannot mark socket non-blocking: %s", + s->id, strerror(errno)); + goto fail; + } + if (connect(s->server_fd, server_sa, server_sa->sa_len) < 0 && + errno != EINPROGRESS) { + logmsg(LOG_CRIT, "#%d proxy cannot connect to server %s: %s", + s->id, sock_ntop(server_sa), strerror(errno)); + goto fail; } - close(server_listen_socket); - server_listen_socket = -1; - - if (getuid() != 0) { - /* - * We're not running as root, so we get a backchannel - * socket bound in our designated range, instead of - * getting one bound to port 20 - This is deliberately - * not RFC compliant. - */ - bcopy(&src_addr, &listen_sa.sin_addr, sizeof(struct in_addr)); - client_data_socket = get_backchannel_socket(SOCK_STREAM, - min_port, max_port, -1, 1, &listen_sa); - if (client_data_socket < 0) { - syslog(LOG_NOTICE, "get_backchannel_socket() failed (%m)"); - exit(EX_OSERR); - } - } else { + len = sizeof(struct sockaddr_storage); + if ((getsockname(s->server_fd, proxy_to_server_sa, &len)) < 0) { + logmsg(LOG_CRIT, "#%d getsockname failed: %s", s->id, + strerror(errno)); + goto fail; + } - /* - * We're root, get our backchannel socket bound to port - * 20 here, so we're fully RFC compliant. - */ - client_data_socket = socket(AF_INET, SOCK_STREAM, 0); + logmsg(LOG_INFO, "#%d FTP session %d/%d started: client %s to server " + "%s via proxy %s ", s->id, session_count, max_sessions, + sock_ntop(client_sa), sock_ntop(server_sa), + sock_ntop(proxy_to_server_sa)); - salen = 1; - listen_sa.sin_family = AF_INET; - bcopy(&src_addr, &listen_sa.sin_addr, sizeof(struct in_addr)); - listen_sa.sin_port = htons(20); + /* Keepalive is nice, but don't care if it fails. */ + on = 1; + setsockopt(s->client_fd, SOL_SOCKET, SO_KEEPALIVE, (void *)&on, + sizeof on); + setsockopt(s->server_fd, SOL_SOCKET, SO_KEEPALIVE, (void *)&on, + sizeof on); - if (setsockopt(client_data_socket, SOL_SOCKET, SO_REUSEADDR, - &salen, sizeof(salen)) == -1) { - syslog(LOG_NOTICE, "setsockopt() failed (%m)"); - exit(EX_OSERR); - } - - if (bind(client_data_socket, (struct sockaddr *)&listen_sa, - sizeof(listen_sa)) == - 1) { - syslog(LOG_NOTICE, "data channel bind() failed (%m)"); - exit(EX_OSERR); - } + /* + * Setup buffered events. + */ + s->client_bufev = bufferevent_new(s->client_fd, &client_read, NULL, + &client_error, s); + if (s->client_bufev == NULL) { + logmsg(LOG_CRIT, "#%d bufferevent_new client failed", s->id); + goto fail; } - - if (connect(client_data_socket, (struct sockaddr *) &client_listen_sa, - sizeof(client_listen_sa)) != 0) { - syslog(LOG_INFO, "cannot connect data channel (%m)"); - exit(EX_NOHOST); + bufferevent_settimeout(s->client_bufev, timeout, 0); + bufferevent_enable(s->client_bufev, EV_READ | EV_TIMEOUT); + + s->server_bufev = bufferevent_new(s->server_fd, &server_read, NULL, + &server_error, s); + if (s->server_bufev == NULL) { + logmsg(LOG_CRIT, "#%d bufferevent_new server failed", s->id); + goto fail; } + bufferevent_settimeout(s->server_bufev, CONNECT_TIMEOUT, 0); + bufferevent_enable(s->server_bufev, EV_READ | EV_TIMEOUT); + + return; - client_data_bytes = 0; - server_data_bytes = 0; - xfer_start_time = wallclock_time(); + fail: + end_session(s); } void -do_client_cmd(struct csiob *client, struct csiob *server) +handle_signal(int sig, short event, void *arg) { - int i, j, rv; - char tbuf[100]; - char *sendbuf = NULL; - - log_control_command((char *)client->line_buffer, 1); - - /* client->line_buffer is an ftp control command. - * There is no reason for these to be very long. - * In the interest of limiting buffer overrun attempts, - * we catch them here. - */ - if (strlen((char *)client->line_buffer) > 512) { - syslog(LOG_NOTICE, "excessively long control command"); - exit(EX_DATAERR); - } - /* - * Check the client user provided if needed + * Signal handler rules don't apply, libevent decouples for us. */ - if (AnonFtpOnly && strncasecmp((char *)client->line_buffer, "user ", - strlen("user ")) == 0) { - char *cp; - - cp = (char *) client->line_buffer + strlen("user "); - if ((strcasecmp(cp, "ftp\r\n") != 0) && - (strcasecmp(cp, "anonymous\r\n") != 0)) { - /* - * this isn't anonymous - give the client an - * error before they send a password - */ - snprintf(tbuf, sizeof(tbuf), - "500 Only anonymous FTP is allowed\r\n"); - j = 0; - i = strlen(tbuf); - do { - rv = send(client->fd, tbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && - errno != EINTR) - break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); - sendbuf = NULL; - } else - sendbuf = (char *)client->line_buffer; - } else if ((strncasecmp((char *)client->line_buffer, "eprt ", - strlen("eprt ")) == 0)) { - - /* Watch out for EPRT commands */ - char *line = NULL, *q, *p, *result[3], delim; - struct addrinfo hints, *res = NULL; - unsigned long proto; - - j = 0; - line = strdup((char *)client->line_buffer+strlen("eprt ")); - if (line == NULL) { - syslog(LOG_ERR, "insufficient memory"); - exit(EX_UNAVAILABLE); - } - p = line; - delim = p[0]; - p++; - - memset(result,0, sizeof(result)); - for (i = 0; i < 3; i++) { - q = strchr(p, delim); - if (!q || *q != delim) - goto parsefail; - *q++ = '\0'; - result[i] = p; - p = q; - } - proto = strtoul(result[0], &p, 10); - if (!*result[0] || *p) - goto protounsupp; + logmsg(LOG_ERR, "%s exiting on signal %d", __progname, sig); - memset(&hints, 0, sizeof(hints)); - if (proto != 1) /* 1 == AF_INET - all we support for now */ - goto protounsupp; - hints.ai_family = AF_INET; - hints.ai_socktype = SOCK_STREAM; - hints.ai_flags = AI_NUMERICHOST; /*no DNS*/ - if (getaddrinfo(result[1], result[2], &hints, &res)) - goto parsefail; - if (res->ai_next) - goto parsefail; - if (sizeof(client_listen_sa) < res->ai_addrlen) - goto parsefail; - memcpy(&client_listen_sa, res->ai_addr, res->ai_addrlen); - - debuglog(1, "client wants us to use %s:%u", - inet_ntoa(client_listen_sa.sin_addr), - htons(client_listen_sa.sin_port)); - - /* - * Configure our own listen socket and tell the server about it - */ - new_dataconn(1); - connection_mode = EPRT_MODE; - - debuglog(1, "we want server to use %s:%u", - inet_ntoa(server->sa.sin_addr), - ntohs(server_listen_sa.sin_port)); - - snprintf(tbuf, sizeof(tbuf), "EPRT |%d|%s|%u|\r\n", 1, - inet_ntoa(server->sa.sin_addr), - ntohs(server_listen_sa.sin_port)); - debuglog(1, "to server (modified): %s", tbuf); - sendbuf = tbuf; - goto out; -parsefail: - snprintf(tbuf, sizeof(tbuf), - "500 Invalid argument; rejected\r\n"); - sendbuf = NULL; - goto out; -protounsupp: - /* we only support AF_INET for now */ - if (proto == 2) - snprintf(tbuf, sizeof(tbuf), - "522 Protocol not supported, use (1)\r\n"); - else - snprintf(tbuf, sizeof(tbuf), - "501 Protocol not supported\r\n"); - sendbuf = NULL; -out: - if (line) - free(line); - if (res) - freeaddrinfo(res); - if (sendbuf == NULL) { - debuglog(1, "to client (modified): %s", tbuf); - i = strlen(tbuf); - do { - rv = send(client->fd, tbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && - errno != EINTR) - break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); - } - } else if (!NatMode && (strncasecmp((char *)client->line_buffer, - "epsv", strlen("epsv")) == 0)) { - - /* - * If we aren't in NAT mode, deal with EPSV. - * EPSV is a problem - Unlike PASV, the reply from the - * server contains *only* a port, we can't modify the reply - * to the client and get the client to connect to us without - * resorting to using a dynamic rdr rule we have to add in - * for the reply to this connection, and take away afterwards. - * so this will wait until we have the right solution for rule - * additions/deletions in pf. - * - * in the meantime we just tell the client we don't do it, - * and most clients should fall back to using PASV. - */ - - snprintf(tbuf, sizeof(tbuf), - "500 EPSV command not understood\r\n"); - debuglog(1, "to client (modified): %s", tbuf); - j = 0; - i = strlen(tbuf); - do { - rv = send(client->fd, tbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && errno != EINTR) - break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); - sendbuf = NULL; - } else if (strncasecmp((char *)client->line_buffer, "port ", - strlen("port ")) == 0) { - unsigned int values[6]; - char *tailptr; - - debuglog(1, "Got a PORT command"); - - tailptr = (char *)&client->line_buffer[strlen("port ")]; - values[0] = 0; - - i = sscanf(tailptr, "%u,%u,%u,%u,%u,%u", &values[0], - &values[1], &values[2], &values[3], &values[4], - &values[5]); - if (i != 6) { - syslog(LOG_INFO, "malformed PORT command (%s)", - client->line_buffer); - exit(EX_DATAERR); - } - - for (i = 0; i<6; i++) { - if (values[i] > 255) { - syslog(LOG_INFO, - "malformed PORT command (%s)", - client->line_buffer); - exit(EX_DATAERR); - } - } - - client_listen_sa.sin_family = AF_INET; - client_listen_sa.sin_addr.s_addr = htonl((values[0] << 24) | - (values[1] << 16) | (values[2] << 8) | - (values[3] << 0)); - - client_listen_sa.sin_port = htons((values[4] << 8) | - values[5]); - debuglog(1, "client wants us to use %u.%u.%u.%u:%u", - values[0], values[1], values[2], values[3], - (values[4] << 8) | values[5]); - - /* - * Configure our own listen socket and tell the server about it - */ - new_dataconn(1); - connection_mode = PORT_MODE; - - debuglog(1, "we want server to use %s:%u", - inet_ntoa(server->sa.sin_addr), - ntohs(server_listen_sa.sin_port)); - - snprintf(tbuf, sizeof(tbuf), "PORT %u,%u,%u,%u,%u,%u\r\n", - ((u_char *)&server->sa.sin_addr.s_addr)[0], - ((u_char *)&server->sa.sin_addr.s_addr)[1], - ((u_char *)&server->sa.sin_addr.s_addr)[2], - ((u_char *)&server->sa.sin_addr.s_addr)[3], - ((u_char *)&server_listen_sa.sin_port)[0], - ((u_char *)&server_listen_sa.sin_port)[1]); - - debuglog(1, "to server (modified): %s", tbuf); - - sendbuf = tbuf; - } else - sendbuf = (char *)client->line_buffer; + exit_daemon(); +} + - /* - *send our (possibly modified) control command in sendbuf - * on it's way to the server - */ - if (sendbuf != NULL) { - j = 0; - i = strlen(sendbuf); - do { - rv = send(server->fd, sendbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && errno != EINTR) - break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); - } +struct session * +init_session(void) +{ + struct session *s; + + s = calloc(1, sizeof(struct session)); + if (s == NULL) + return (NULL); + + s->id = id_count++; + s->client_fd = -1; + s->server_fd = -1; + s->cbuf[0] = '\0'; + s->cbuf_valid = 0; + s->sbuf[0] = '\0'; + s->sbuf_valid = 0; + s->client_bufev = NULL; + s->server_bufev = NULL; + s->cmd = CMD_NONE; + s->port = 0; + + LIST_INSERT_HEAD(&sessions, s, entry); + session_count++; + + return (s); } void -do_server_reply(struct csiob *server, struct csiob *client) +logmsg(int pri, const char *message, ...) { - int code, i, j, rv; - struct in_addr *iap; - static int continuing = 0; - char tbuf[100], *sendbuf, *p; - - log_control_command((char *)server->line_buffer, 0); - - if (strlen((char *)server->line_buffer) > 512) { - /* - * someone's playing games. Have a cow in the syslogs and - * exit - we don't pass this on for fear of hurting - * our other end, which might be poorly implemented. - */ - syslog(LOG_NOTICE, "long FTP control reply"); - exit(EX_DATAERR); - } + va_list ap; - /* - * Watch out for "227 Entering Passive Mode ..." replies - */ - code = strtol((char *)server->line_buffer, &p, 10); - if (isspace(server->line_buffer[0])) - code = 0; - if (!*(server->line_buffer) || (*p != ' ' && *p != '-')) { - if (continuing) - goto sendit; - syslog(LOG_INFO, "malformed control reply"); - exit(EX_DATAERR); - } - if (code <= 0 || code > 999) { - if (continuing) - goto sendit; - syslog(LOG_INFO, "invalid server reply code %d", code); - exit(EX_DATAERR); - } - if (*p == '-') - continuing = 1; - else - continuing = 0; - if (code == 227 && !NatMode) { - unsigned int values[6]; - char *tailptr; - - debuglog(1, "Got a PASV reply"); - debuglog(1, "{%s}", (char *)server->line_buffer); - - tailptr = (char *)strchr((char *)server->line_buffer, '('); - if (tailptr == NULL) { - tailptr = strrchr((char *)server->line_buffer, ' '); - if (tailptr == NULL) { - syslog(LOG_NOTICE, "malformed 227 reply"); - exit(EX_DATAERR); - } - } - tailptr++; /* skip past space or ( */ + if (pri > loglevel) + return; - values[0] = 0; + va_start(ap, message); - i = sscanf(tailptr, "%u,%u,%u,%u,%u,%u", &values[0], - &values[1], &values[2], &values[3], &values[4], - &values[5]); - if (i != 6) { - syslog(LOG_INFO, "malformed PASV reply (%s)", - client->line_buffer); - exit(EX_DATAERR); - } - for (i = 0; i<6; i++) - if (values[i] > 255) { - syslog(LOG_INFO, "malformed PASV reply(%s)", - client->line_buffer); - exit(EX_DATAERR); - } + if (daemonize) + /* syslog does its own vissing. */ + vsyslog(pri, message, ap); + else { + char buf[MAX_LOGLINE]; + char visbuf[2 * MAX_LOGLINE]; - server_listen_sa.sin_family = AF_INET; - server_listen_sa.sin_addr.s_addr = htonl((values[0] << 24) | - (values[1] << 16) | (values[2] << 8) | (values[3] << 0)); - server_listen_sa.sin_port = htons((values[4] << 8) | - values[5]); - - debuglog(1, "server wants us to use %s:%u", - inet_ntoa(server_listen_sa.sin_addr), (values[4] << 8) | - values[5]); - - new_dataconn(0); - connection_mode = PASV_MODE; - if (ReverseMode) - iap = &(proxy_sa.sin_addr); - else - iap = &(server->sa.sin_addr); - - debuglog(1, "we want client to use %s:%u", inet_ntoa(*iap), - htons(client_listen_sa.sin_port)); - - snprintf(tbuf, sizeof(tbuf), - "227 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n", - ((u_char *)iap)[0], ((u_char *)iap)[1], - ((u_char *)iap)[2], ((u_char *)iap)[3], - ((u_char *)&client_listen_sa.sin_port)[0], - ((u_char *)&client_listen_sa.sin_port)[1]); - debuglog(1, "to client (modified): %s", tbuf); - sendbuf = tbuf; - } else { - sendit: - sendbuf = (char *)server->line_buffer; + /* We don't care about truncation. */ + vsnprintf(buf, sizeof buf, message, ap); + strnvis(visbuf, buf, sizeof visbuf, VIS_CSTYLE | VIS_NL); + fprintf(stderr, "%s\n", visbuf); } - /* - * send our (possibly modified) control command in sendbuf - * on it's way to the client - */ - j = 0; - i = strlen(sendbuf); - do { - rv = send(client->fd, sendbuf + j, i - j, 0); - if (rv == -1 && errno != EAGAIN && errno != EINTR) - break; - else if (rv != -1) - j += rv; - } while (j >= 0 && j < i); - + va_end(ap); } int main(int argc, char *argv[]) { - struct csiob client_iob, server_iob; - struct sigaction new_sa, old_sa; - int sval, ch, flags, i; - socklen_t salen; - int one = 1; - long timeout_seconds = 0; - struct timeval tv; -#ifdef LIBWRAP - int use_tcpwrapper = 0; -#endif /* LIBWRAP */ - - while ((ch = getopt(argc, argv, "a:D:g:m:M:R:S:t:u:AnVwr")) != -1) { - char *p; + struct rlimit rlp; + struct addrinfo hints, *res; + struct event ev, ev_sighup, ev_sigint, ev_sigterm; + int ch, error, listenfd, on; + const char *errstr; + + /* Defaults. */ + anonymous_only = 0; + daemonize = 1; + fixed_proxy = NULL; + fixed_server = NULL; + fixed_server_port = "21"; + ipv6_mode = 0; + listen_ip = NULL; + listen_port = "8021"; + loglevel = LOG_NOTICE; + max_sessions = 100; + qname = NULL; + rfc_mode = 0; + timeout = 24 * 3600; + verbose = 0; + + /* Other initialization. */ + id_count = 1; + session_count = 0; + + while ((ch = getopt(argc, argv, "6Aa:b:D:dm:P:p:q:R:rt:v")) != -1) { switch (ch) { - case 'a': - if (!*optarg) - usage(); - if ((Bind_Addr = inet_addr(optarg)) == INADDR_NONE) { - syslog(LOG_NOTICE, - "%s: invalid address", optarg); - usage(); - } + case '6': + ipv6_mode = 1; break; case 'A': - AnonFtpOnly = 1; /* restrict to anon usernames only */ + anonymous_only = 1; + break; + case 'a': + fixed_proxy = optarg; + break; + case 'b': + listen_ip = optarg; break; case 'D': - Debug_Level = strtol(optarg, &p, 10); - if (!*optarg || *p) - usage(); + loglevel = strtonum(optarg, LOG_EMERG, LOG_DEBUG, + &errstr); + if (errstr) + errx(1, "loglevel %s", errstr); break; - case 'g': - Group = optarg; + case 'd': + daemonize = 0; break; case 'm': - min_port = strtol(optarg, &p, 10); - if (!*optarg || *p) - usage(); - if (min_port < 0 || min_port > USHRT_MAX) - usage(); + max_sessions = strtonum(optarg, 1, 500, &errstr); + if (errstr) + errx(1, "max sessions %s", errstr); break; - case 'M': - max_port = strtol(optarg, &p, 10); - if (!*optarg || *p) - usage(); - if (max_port < 0 || max_port > USHRT_MAX) - usage(); + case 'P': + fixed_server_port = optarg; break; - case 'n': - NatMode = 1; /* pass all passives, we're using NAT */ + case 'p': + listen_port = optarg; break; - case 'r': - Use_Rdns = 1; /* look up hostnames */ + case 'q': + if (strlen(optarg) >= PF_QNAME_SIZE) + errx(1, "queuename too long"); + qname = optarg; break; - case 'R': { - char *s, *t; - - if (!*optarg) - usage(); - if ((s = strdup(optarg)) == NULL) { - syslog (LOG_NOTICE, - "Insufficient memory (malloc failed)"); - exit(EX_UNAVAILABLE); - } - memset(&real_server_sa, 0, sizeof(real_server_sa)); - real_server_sa.sin_len = sizeof(struct sockaddr_in); - real_server_sa.sin_family = AF_INET; - t = strchr(s, ':'); - if (t == NULL) - real_server_sa.sin_port = htons(21); - else { - long port = strtol(t + 1, &p, 10); - - if (*p || port <= 0 || port > 65535) - usage(); - real_server_sa.sin_port = htons(port); - *t = 0; - } - real_server_sa.sin_addr.s_addr = inet_addr(s); - if (real_server_sa.sin_addr.s_addr == INADDR_NONE) - usage(); - free(s); - ReverseMode = 1; + case 'R': + fixed_server = optarg; break; - } - case 'S': - if (!inet_aton(optarg, &src_addr)) - usage(); + case 'r': + rfc_mode = 1; break; case 't': - timeout_seconds = strtol(optarg, &p, 10); - if (!*optarg || *p) - usage(); - break; - case 'u': - User = optarg; + timeout = strtonum(optarg, 0, 86400, &errstr); + if (errstr) + errx(1, "timeout %s", errstr); break; - case 'V': - Verbose = 1; - break; -#ifdef LIBWRAP - case 'w': - use_tcpwrapper = 1; /* do the libwrap thing */ + case 'v': + verbose++; + if (verbose > 2) + usage(); break; -#endif /* LIBWRAP */ default: usage(); - /* NOTREACHED */ } } - argc -= optind; - argv += optind; - if (max_port < min_port) - usage(); + if (listen_ip == NULL) + listen_ip = ipv6_mode ? "::1" : "127.0.0.1"; - openlog(__progname, LOG_NDELAY|LOG_PID, LOG_DAEMON); + /* Check for root to save the user from cryptic failure messages. */ + if (getuid() != 0) + errx(1, "needs to start as root"); - setlinebuf(stdout); - setlinebuf(stderr); + /* Raise max. open files limit to satisfy max. sessions. */ + rlp.rlim_cur = rlp.rlim_max = (2 * max_sessions) + 10; + if (setrlimit(RLIMIT_NOFILE, &rlp) == -1) + err(1, "setrlimit"); - memset(&client_iob, 0, sizeof(client_iob)); - memset(&server_iob, 0, sizeof(server_iob)); - - if (get_proxy_env(0, &real_server_sa, &client_iob.sa, - &proxy_sa) == -1) - exit(EX_PROTOCOL); + if (fixed_proxy) { + memset(&hints, 0, sizeof hints); + hints.ai_flags = AI_NUMERICHOST; + hints.ai_family = ipv6_mode ? AF_INET6 : AF_INET; + hints.ai_socktype = SOCK_STREAM; + error = getaddrinfo(fixed_proxy, NULL, &hints, &res); + if (error) + errx(1, "getaddrinfo fixed proxy address failed: %s", + gai_strerror(error)); + memcpy(&fixed_proxy_ss, res->ai_addr, res->ai_addrlen); + logmsg(LOG_INFO, "using %s to connect to servers", + sock_ntop(sstosa(&fixed_proxy_ss))); + freeaddrinfo(res); + } - /* - * We may now drop root privs, as we have done our ioctl for - * pf. If we do drop root, we can't make backchannel connections - * for PORT and EPRT come from port 20, which is not strictly - * RFC compliant. This shouldn't cause problems for all but - * the stupidest ftp clients and the stupidest packet filters. - */ - drop_privs(); + if (fixed_server) { + memset(&hints, 0, sizeof hints); + hints.ai_family = ipv6_mode ? AF_INET6 : AF_INET; + hints.ai_socktype = SOCK_STREAM; + error = getaddrinfo(fixed_server, fixed_server_port, &hints, + &res); + if (error) + errx(1, "getaddrinfo fixed server address failed: %s", + gai_strerror(error)); + memcpy(&fixed_server_ss, res->ai_addr, res->ai_addrlen); + logmsg(LOG_INFO, "using fixed server %s", + sock_ntop(sstosa(&fixed_server_ss))); + freeaddrinfo(res); + } - /* - * We check_host after get_proxy_env so that checks are done - * against the original destination endpoint, not the endpoint - * of our side of the rdr. This allows the use of tcpwrapper - * rules to restrict destinations as well as sources of connections - * for ftp. - */ - if (Use_Rdns) - flags = 0; - else - flags = NI_NUMERICHOST | NI_NUMERICSERV; + /* Setup listener. */ + memset(&hints, 0, sizeof hints); + hints.ai_flags = AI_NUMERICHOST | AI_PASSIVE; + hints.ai_family = ipv6_mode ? AF_INET6 : AF_INET; + hints.ai_socktype = SOCK_STREAM; + error = getaddrinfo(listen_ip, listen_port, &hints, &res); + if (error) + errx(1, "getaddrinfo listen address failed: %s", + gai_strerror(error)); + if ((listenfd = socket(res->ai_family, SOCK_STREAM, IPPROTO_TCP)) == -1) + errx(1, "socket failed"); + on = 1; + if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, + sizeof on) != 0) + err(1, "setsockopt failed"); + if (bind(listenfd, (struct sockaddr *)res->ai_addr, + (socklen_t)res->ai_addrlen) != 0) + err(1, "bind failed"); + if (listen(listenfd, TCP_BACKLOG) != 0) + err(1, "listen failed"); + freeaddrinfo(res); + + /* Initialize pf. */ + init_filter(qname, verbose); + + if (daemonize) { + if (daemon(0, 0) == -1) + err(1, "cannot daemonize"); + openlog(__progname, LOG_PID | LOG_NDELAY, LOG_DAEMON); + } - i = getnameinfo((struct sockaddr *)&client_iob.sa, - sizeof(client_iob.sa), ClientName, sizeof(ClientName), NULL, 0, - flags); + /* Use logmsg for output from here on. */ - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) { - debuglog(2, "name resolution failure (client)"); - exit(EX_OSERR); + if (!drop_privs()) { + logmsg(LOG_ERR, "cannot drop privileges: %s", strerror(errno)); + exit(1); } + + event_init(); - i = getnameinfo((struct sockaddr *)&real_server_sa, - sizeof(real_server_sa), RealServerName, sizeof(RealServerName), - NULL, 0, flags); + /* Setup signal handler. */ + signal(SIGPIPE, SIG_IGN); + signal_set(&ev_sighup, SIGHUP, handle_signal, NULL); + signal_set(&ev_sigint, SIGINT, handle_signal, NULL); + signal_set(&ev_sigterm, SIGTERM, handle_signal, NULL); + signal_add(&ev_sighup, NULL); + signal_add(&ev_sigint, NULL); + signal_add(&ev_sigterm, NULL); - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) { - debuglog(2, "name resolution failure (server)"); - exit(EX_OSERR); - } + event_set(&ev, listenfd, EV_READ | EV_PERSIST, handle_connection, &ev); + event_add(&ev, NULL); -#ifdef LIBWRAP - if (use_tcpwrapper && !check_host(&client_iob.sa, &real_server_sa)) - exit(EX_NOPERM); -#endif + logmsg(LOG_NOTICE, "listening on %s port %s", listen_ip, listen_port); - client_iob.fd = 0; + /* Vroom, vroom. */ + event_dispatch(); - syslog(LOG_INFO, "accepted connection from %s:%u to %s:%u", ClientName, - ntohs(client_iob.sa.sin_port), RealServerName, - ntohs(real_server_sa.sin_port)); + logmsg(LOG_ERR, "event_dispatch error: %s", strerror(errno)); + exit_daemon(); - server_iob.fd = get_backchannel_socket(SOCK_STREAM, min_port, max_port, - -1, 1, &server_iob.sa); + /* NOTREACHED */ + return (1); +} - if (connect(server_iob.fd, (struct sockaddr *)&real_server_sa, - sizeof(real_server_sa)) != 0) { - syslog(LOG_INFO, "cannot connect to %s:%u (%m)", RealServerName, - ntohs(real_server_sa.sin_port)); - exit(EX_NOHOST); +u_int16_t +parse_port(int mode) +{ + unsigned int port, v[6]; + int n; + char *p; + + /* Find the last space or left-parenthesis. */ + for (p = linebuf + linelen; p > linebuf; p--) + if (*p == ' ' || *p == '(') + break; + if (p == linebuf) + return (0); + + switch (mode) { + case CMD_PORT: + n = sscanf(p, " %u,%u,%u,%u,%u,%u", &v[0], &v[1], &v[2], + &v[3], &v[4], &v[5]); + if (n == 6 && v[0] < 256 && v[1] < 256 && v[2] < 256 && + v[3] < 256 && v[4] < 256 && v[5] < 256) + return ((v[4] << 8) | v[5]); + break; + case CMD_PASV: + n = sscanf(p, "(%u,%u,%u,%u,%u,%u)", &v[0], &v[1], &v[2], + &v[3], &v[4], &v[5]); + if (n == 6 && v[0] < 256 && v[1] < 256 && v[2] < 256 && + v[3] < 256 && v[4] < 256 && v[5] < 256) + return ((v[4] << 8) | v[5]); + break; + case CMD_EPSV: + n = sscanf(p, "(|||%u|)", &port); + if (n == 1 && port < 65536) + return (port); + break; + case CMD_EPRT: + n = sscanf(p, " |1|%u.%u.%u.%u|%u|", &v[0], &v[1], &v[2], + &v[3], &port); + if (n == 5 && v[0] < 256 && v[1] < 256 && v[2] < 256 && + v[3] < 256 && port < 65536) + return (port); + n = sscanf(p, " |2|%*[a-fA-F0-9:]|%u|", &port); + if (n == 1 && port < 65536) + return (port); + break; + default: + return (0); } - /* - * Now that we are connected to the real server, get the name - * of our end of the server socket so we know our IP address - * from the real server's perspective. - */ - salen = sizeof(server_iob.sa); - getsockname(server_iob.fd, (struct sockaddr *)&server_iob.sa, &salen); + return (0); +} - i = getnameinfo((struct sockaddr *)&server_iob.sa, - sizeof(server_iob.sa), OurName, sizeof(OurName), NULL, 0, flags); +u_int16_t +pick_proxy_port(void) +{ + /* Random should be good enough for avoiding port collisions. */ + return (IPPORT_HIFIRSTAUTO + (arc4random() % + (IPPORT_HILASTAUTO - IPPORT_HIFIRSTAUTO))); +} - if (i != 0 && i != EAI_NONAME && i != EAI_AGAIN) { - debuglog(2, "name resolution failure (local)"); - exit(EX_OSERR); +void +proxy_reply(int cmd, struct sockaddr *sa, u_int16_t port) +{ + int i, r; + + switch (cmd) { + case CMD_PORT: + r = snprintf(linebuf, sizeof linebuf, + "PORT %s,%u,%u\r\n", sock_ntop(sa), port / 256, + port % 256); + break; + case CMD_PASV: + r = snprintf(linebuf, sizeof linebuf, + "227 Entering Passive Mode (%s,%u,%u)\r\n", sock_ntop(sa), + port / 256, port % 256); + break; + case CMD_EPRT: + if (sa->sa_family == AF_INET) + r = snprintf(linebuf, sizeof linebuf, + "EPRT |1|%s|%u|\r\n", sock_ntop(sa), port); + else if (sa->sa_family == AF_INET6) + r = snprintf(linebuf, sizeof linebuf, + "EPRT |2|%s|%u|\r\n", sock_ntop(sa), port); + break; + case CMD_EPSV: + r = snprintf(linebuf, sizeof linebuf, + "229 Entering Extended Passive Mode (|||%u|)\r\n", port); + break; } - debuglog(1, "local socket is %s:%u", OurName, - ntohs(server_iob.sa.sin_port)); - - /* ignore SIGPIPE */ - bzero(&new_sa, sizeof(new_sa)); - new_sa.sa_handler = SIG_IGN; - (void)sigemptyset(&new_sa.sa_mask); - new_sa.sa_flags = SA_RESTART; - if (sigaction(SIGPIPE, &new_sa, &old_sa) != 0) { - syslog(LOG_ERR, "sigaction() failed (%m)"); - exit(EX_OSERR); + if (r < 0 || r >= sizeof linebuf) { + logmsg(LOG_ERR, "proxy_reply failed: %d", r); + linebuf[0] = '\0'; + linelen = 0; + return; } + linelen = (size_t)r; - if (setsockopt(client_iob.fd, SOL_SOCKET, SO_OOBINLINE, (char *)&one, - sizeof(one)) == -1) { - syslog(LOG_NOTICE, "cannot set SO_OOBINLINE (%m)"); - exit(EX_OSERR); + if (cmd == CMD_PORT || cmd == CMD_PASV) { + /* Replace dots in IP address with commas. */ + for (i = 0; i < linelen; i++) + if (linebuf[i] == '.') + linebuf[i] = ','; } +} - client_iob.line_buffer_size = STARTBUFSIZE; - client_iob.line_buffer = malloc(client_iob.line_buffer_size); - client_iob.io_buffer_size = STARTBUFSIZE; - client_iob.io_buffer = malloc(client_iob.io_buffer_size); - client_iob.next_byte = 0; - client_iob.io_buffer_len = 0; - client_iob.alive = 1; - client_iob.who = "client"; - client_iob.send_oob_flags = 0; - client_iob.real_sa = client_iob.sa; - - server_iob.line_buffer_size = STARTBUFSIZE; - server_iob.line_buffer = malloc(server_iob.line_buffer_size); - server_iob.io_buffer_size = STARTBUFSIZE; - server_iob.io_buffer = malloc(server_iob.io_buffer_size); - server_iob.next_byte = 0; - server_iob.io_buffer_len = 0; - server_iob.alive = 1; - server_iob.who = "server"; - server_iob.send_oob_flags = MSG_OOB; - server_iob.real_sa = real_server_sa; - - if (client_iob.line_buffer == NULL || client_iob.io_buffer == NULL || - server_iob.line_buffer == NULL || server_iob.io_buffer == NULL) { - syslog (LOG_NOTICE, "insufficient memory"); - exit(EX_UNAVAILABLE); - } +void +server_error(struct bufferevent *bufev, short what, void *arg) +{ + struct session *s = arg; + + if (what & EVBUFFER_EOF) + logmsg(LOG_INFO, "#%d server close", s->id); + else if (what == (EVBUFFER_ERROR | EVBUFFER_READ)) + logmsg(LOG_ERR, "#%d server refused connection", s->id); + else if (what & EVBUFFER_WRITE) + logmsg(LOG_ERR, "#%d server write error: %d", s->id, what); + else if (what & EVBUFFER_TIMEOUT) + logmsg(LOG_NOTICE, "#%d server timeout", s->id); + else + logmsg(LOG_ERR, "#%d abnormal server error: %d", s->id, what); + + end_session(s); +} - while (client_iob.alive || server_iob.alive) { - int maxfd = 0; - fd_set *fdsp; - - if (client_iob.fd > maxfd) - maxfd = client_iob.fd; - if (client_listen_socket > maxfd) - maxfd = client_listen_socket; - if (client_data_socket > maxfd) - maxfd = client_data_socket; - if (server_iob.fd > maxfd) - maxfd = server_iob.fd; - if (server_listen_socket > maxfd) - maxfd = server_listen_socket; - if (server_data_socket > maxfd) - maxfd = server_data_socket; - - debuglog(3, "client is %s; server is %s", - client_iob.alive ? "alive" : "dead", - server_iob.alive ? "alive" : "dead"); - - fdsp = (fd_set *)calloc(howmany(maxfd + 1, NFDBITS), - sizeof(fd_mask)); - if (fdsp == NULL) { - syslog(LOG_NOTICE, "insufficient memory"); - exit(EX_UNAVAILABLE); +int +server_parse(struct session *s) +{ + struct sockaddr *client_sa, *orig_sa, *proxy_sa, *server_sa; + int prepared = 0; + + if (s->cmd == CMD_NONE || linelen < 4 || linebuf[0] != '2') + goto out; + + /* + * The pf rules below do quite some NAT rewriting, to keep up + * appearances. Points to keep in mind: + * 1) The client must think it's talking to the real server, + * for both control and data connections. Transparently. + * 2) The server must think that the proxy is the client. + * 3) Source and destination ports are rewritten to minimize + * port collisions, to aid security (some systems pick weak + * ports) or to satisfy RFC requirements (source port 20). + */ + + /* Cast this once, to make code below it more readable. */ + client_sa = sstosa(&s->client_ss); + server_sa = sstosa(&s->server_ss); + proxy_sa = sstosa(&s->proxy_ss); + if (fixed_server) + /* Fixed server: data connections must appear to come + from / go to the original server, not the fixed one. */ + orig_sa = sstosa(&s->orig_server_ss); + else + /* Server not fixed: orig_server == server. */ + orig_sa = sstosa(&s->server_ss); + + /* Passive modes. */ + if ((s->cmd == CMD_PASV && strncmp("227 ", linebuf, 4) == 0) || + (s->cmd == CMD_EPSV && strncmp("229 ", linebuf, 4) == 0)) { + s->port = parse_port(s->cmd); + if (s->port < MIN_PORT) { + logmsg(LOG_CRIT, "#%d bad port in '%s'", s->id, + linebuf); + return (0); } + s->proxy_port = pick_proxy_port(); + logmsg(LOG_INFO, "#%d passive: client to server port %d" + " via port %d", s->id, s->port, s->proxy_port); + + if (prepare_commit(s->id) == -1) + goto fail; + prepared = 1; + + proxy_reply(s->cmd, orig_sa, s->proxy_port); + logmsg(LOG_DEBUG, "#%d proxy: %s", s->id, linebuf); + + /* rdr from $client to $orig_server port $proxy_port -> $server + port $port */ + if (add_rdr(s->id, client_sa, orig_sa, s->proxy_port, + server_sa, s->port) == -1) + goto fail; + + /* nat from $client to $server port $port -> $proxy */ + if (add_nat(s->id, client_sa, server_sa, s->port, proxy_sa, + PF_NAT_PROXY_PORT_LOW, PF_NAT_PROXY_PORT_HIGH) == -1) + goto fail; + + /* pass in from $client to $server port $port */ + if (add_filter(s->id, PF_IN, client_sa, server_sa, + s->port) == -1) + goto fail; + + /* pass out from $proxy to $server port $port */ + if (add_filter(s->id, PF_OUT, proxy_sa, server_sa, + s->port) == -1) + goto fail; + } - if (client_iob.alive && telnet_getline(&client_iob, - &server_iob)) { - debuglog(3, "client line buffer is \"%s\"", - (char *)client_iob.line_buffer); - if (client_iob.line_buffer[0] != '\0') - do_client_cmd(&client_iob, &server_iob); - } else if (server_iob.alive && telnet_getline(&server_iob, - &client_iob)) { - debuglog(3, "server line buffer is \"%s\"", - (char *)server_iob.line_buffer); - if (server_iob.line_buffer[0] != '\0') - do_server_reply(&server_iob, &client_iob); + /* Active modes. */ + if ((s->cmd == CMD_PORT || s->cmd == CMD_EPRT) && + strncmp("200 ", linebuf, 4) == 0) { + logmsg(LOG_INFO, "#%d active: server to client port %d" + " via port %d", s->id, s->port, s->proxy_port); + + if (prepare_commit(s->id) == -1) + goto fail; + prepared = 1; + + /* rdr from $server to $proxy port $proxy_port -> $client port + $port */ + if (add_rdr(s->id, server_sa, proxy_sa, s->proxy_port, + client_sa, s->port) == -1) + goto fail; + + /* nat from $server to $client port $port -> $orig_server port + $natport */ + if (rfc_mode && s->cmd == CMD_PORT) { + /* Rewrite sourceport to RFC mandated 20. */ + if (add_nat(s->id, server_sa, client_sa, s->port, + orig_sa, 20, 20) == -1) + goto fail; } else { - if (client_iob.alive) { - FD_SET(client_iob.fd, fdsp); - if (client_listen_socket >= 0) - FD_SET(client_listen_socket, fdsp); - if (client_data_socket >= 0) - FD_SET(client_data_socket, fdsp); - } - if (server_iob.alive) { - FD_SET(server_iob.fd, fdsp); - if (server_listen_socket >= 0) - FD_SET(server_listen_socket, fdsp); - if (server_data_socket >= 0) - FD_SET(server_data_socket, fdsp); - } - tv.tv_sec = timeout_seconds; - tv.tv_usec = 0; - - doselect: - sval = select(maxfd + 1, fdsp, NULL, NULL, - (tv.tv_sec == 0) ? NULL : &tv); - if (sval == 0) { - /* - * This proxy has timed out. Expire it - * quietly with an obituary in the syslogs - * for any passing mourners. - */ - syslog(LOG_INFO, - "timeout: no data for %ld seconds", - timeout_seconds); - exit(EX_OK); - } - if (sval == -1) { - if (errno == EINTR || errno == EAGAIN) - goto doselect; - syslog(LOG_NOTICE, - "select() failed (%m)"); - exit(EX_OSERR); - } - if (client_data_socket >= 0 && - FD_ISSET(client_data_socket, fdsp)) { - int rval; - - debuglog(3, "transfer: client to server"); - rval = xfer_data("client to server", - client_data_socket, - server_data_socket, - client_iob.sa.sin_addr, - real_server_sa.sin_addr); - if (rval <= 0) { - close_client_data(); - close_server_data(); - show_xfer_stats(); - } else - client_data_bytes += rval; - } - if (server_data_socket >= 0 && - FD_ISSET(server_data_socket, fdsp)) { - int rval; - - debuglog(3, "transfer: server to client"); - rval = xfer_data("server to client", - server_data_socket, - client_data_socket, - real_server_sa.sin_addr, - client_iob.sa.sin_addr); - if (rval <= 0) { - close_client_data(); - close_server_data(); - show_xfer_stats(); - } else - server_data_bytes += rval; - } - if (server_listen_socket >= 0 && - FD_ISSET(server_listen_socket, fdsp)) { - connect_port_backchannel(); - } - if (client_listen_socket >= 0 && - FD_ISSET(client_listen_socket, fdsp)) { - connect_pasv_backchannel(); - } - if (client_iob.alive && - FD_ISSET(client_iob.fd, fdsp)) { - client_iob.data_available = 1; - } - if (server_iob.alive && - FD_ISSET(server_iob.fd, fdsp)) { - server_iob.data_available = 1; - } + /* Let pf pick a source port from the standard range. */ + if (add_nat(s->id, server_sa, client_sa, s->port, + orig_sa, PF_NAT_PROXY_PORT_LOW, + PF_NAT_PROXY_PORT_HIGH) == -1) + goto fail; } - free(fdsp); - if (client_iob.got_eof) { - shutdown(server_iob.fd, 1); - shutdown(client_iob.fd, 0); - client_iob.got_eof = 0; - client_iob.alive = 0; + + /* pass in from $server to $client port $port */ + if (add_filter(s->id, PF_IN, server_sa, client_sa, s->port) == + -1) + goto fail; + + /* pass out from $orig_server to $client port $port */ + if (add_filter(s->id, PF_OUT, orig_sa, client_sa, s->port) == + -1) + goto fail; + } + + /* Commit rules if they were prepared. */ + if (prepared && (do_commit() == -1)) { + if (errno != EBUSY) + goto fail; + /* One more try if busy. */ + usleep(5000); + if (do_commit() == -1) + goto fail; + } + + out: + s->cmd = CMD_NONE; + s->port = 0; + + return (1); + + fail: + logmsg(LOG_CRIT, "#%d pf operation failed: %s", s->id, strerror(errno)); + if (prepared) + do_rollback(); + return (0); +} + +void +server_read(struct bufferevent *bufev, void *arg) +{ + struct session *s = arg; + size_t buf_avail, read; + int n; + + bufferevent_settimeout(bufev, timeout, 0); + + do { + buf_avail = sizeof s->sbuf - s->sbuf_valid; + read = bufferevent_read(bufev, s->sbuf + s->sbuf_valid, + buf_avail); + s->sbuf_valid += read; + + while ((n = getline(s->sbuf, &s->sbuf_valid)) > 0) { + logmsg(LOG_DEBUG, "#%d server: %s", s->id, linebuf); + if (!server_parse(s)) { + end_session(s); + return; + } + bufferevent_write(s->client_bufev, linebuf, linelen); } - if (server_iob.got_eof) { - shutdown(client_iob.fd, 1); - shutdown(server_iob.fd, 0); - server_iob.got_eof = 0; - server_iob.alive = 0; + + if (n == -1) { + logmsg(LOG_ERR, "#%d server reply too long or not" + " clean", s->id); + end_session(s); + return; } + } while (read == buf_avail); +} + +const char * +sock_ntop(struct sockaddr *sa) +{ + static int n = 0; + + /* Cycle to next buffer. */ + n = (n + 1) % NTOP_BUFS; + ntop_buf[n][0] = '\0'; + + if (sa->sa_family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + + return (inet_ntop(AF_INET, &sin->sin_addr, ntop_buf[n], + sizeof ntop_buf[0])); + } + + if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + + return (inet_ntop(AF_INET6, &sin6->sin6_addr, ntop_buf[n], + sizeof ntop_buf[0])); } - if (Verbose) - syslog(LOG_INFO, "session ended"); + return (NULL); +} - exit(EX_OK); +void +usage(void) +{ + fprintf(stderr, "usage: %s [-6Adrv] [-a address] [-b address]" + " [-D level] [-m maxsessions]\n [-P port]" + " [-p port] [-q queue] [-R address] [-t timeout]\n", __progname); + exit(1); } diff --git a/contrib/pf/libevent/buffer.c b/contrib/pf/libevent/buffer.c new file mode 100644 index 0000000..77efd0c --- /dev/null +++ b/contrib/pf/libevent/buffer.c @@ -0,0 +1,456 @@ +/* + * Copyright (c) 2002, 2003 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_VASPRINTF +/* If we have vasprintf, we need to define this before we include stdio.h. */ +#define _GNU_SOURCE +#endif + +#include <sys/types.h> + +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif + +#ifdef HAVE_SYS_IOCTL_H +#include <sys/ioctl.h> +#endif + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifdef HAVE_STDARG_H +#include <stdarg.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#include "event.h" + +struct evbuffer * +evbuffer_new(void) +{ + struct evbuffer *buffer; + + buffer = calloc(1, sizeof(struct evbuffer)); + + return (buffer); +} + +void +evbuffer_free(struct evbuffer *buffer) +{ + if (buffer->orig_buffer != NULL) + free(buffer->orig_buffer); + free(buffer); +} + +/* + * This is a destructive add. The data from one buffer moves into + * the other buffer. + */ + +#define SWAP(x,y) do { \ + (x)->buffer = (y)->buffer; \ + (x)->orig_buffer = (y)->orig_buffer; \ + (x)->misalign = (y)->misalign; \ + (x)->totallen = (y)->totallen; \ + (x)->off = (y)->off; \ +} while (0) + +int +evbuffer_add_buffer(struct evbuffer *outbuf, struct evbuffer *inbuf) +{ + int res; + + /* Short cut for better performance */ + if (outbuf->off == 0) { + struct evbuffer tmp; + size_t oldoff = inbuf->off; + + /* Swap them directly */ + SWAP(&tmp, outbuf); + SWAP(outbuf, inbuf); + SWAP(inbuf, &tmp); + + /* + * Optimization comes with a price; we need to notify the + * buffer if necessary of the changes. oldoff is the amount + * of data that we tranfered from inbuf to outbuf + */ + if (inbuf->off != oldoff && inbuf->cb != NULL) + (*inbuf->cb)(inbuf, oldoff, inbuf->off, inbuf->cbarg); + if (oldoff && outbuf->cb != NULL) + (*outbuf->cb)(outbuf, 0, oldoff, outbuf->cbarg); + + return (0); + } + + res = evbuffer_add(outbuf, inbuf->buffer, inbuf->off); + if (res == 0) { + /* We drain the input buffer on success */ + evbuffer_drain(inbuf, inbuf->off); + } + + return (res); +} + +int +evbuffer_add_vprintf(struct evbuffer *buf, const char *fmt, va_list ap) +{ + char *buffer; + size_t space; + size_t oldoff = buf->off; + int sz; + va_list aq; + + for (;;) { + buffer = (char *)buf->buffer + buf->off; + space = buf->totallen - buf->misalign - buf->off; + +#ifndef va_copy +#define va_copy(dst, src) memcpy(&(dst), &(src), sizeof(va_list)) +#endif + va_copy(aq, ap); + +#ifdef WIN32 + sz = vsnprintf(buffer, space - 1, fmt, aq); + buffer[space - 1] = '\0'; +#else + sz = vsnprintf(buffer, space, fmt, aq); +#endif + + va_end(aq); + + if (sz == -1) + return (-1); + if (sz < space) { + buf->off += sz; + if (buf->cb != NULL) + (*buf->cb)(buf, oldoff, buf->off, buf->cbarg); + return (sz); + } + if (evbuffer_expand(buf, sz + 1) == -1) + return (-1); + + } + /* NOTREACHED */ +} + +int +evbuffer_add_printf(struct evbuffer *buf, const char *fmt, ...) +{ + int res = -1; + va_list ap; + + va_start(ap, fmt); + res = evbuffer_add_vprintf(buf, fmt, ap); + va_end(ap); + + return (res); +} + +/* Reads data from an event buffer and drains the bytes read */ + +int +evbuffer_remove(struct evbuffer *buf, void *data, size_t datlen) +{ + size_t nread = datlen; + if (nread >= buf->off) + nread = buf->off; + + memcpy(data, buf->buffer, nread); + evbuffer_drain(buf, nread); + + return (nread); +} + +/* + * Reads a line terminated by either '\r\n', '\n\r' or '\r' or '\n'. + * The returned buffer needs to be freed by the called. + */ + +char * +evbuffer_readline(struct evbuffer *buffer) +{ + u_char *data = EVBUFFER_DATA(buffer); + size_t len = EVBUFFER_LENGTH(buffer); + char *line; + unsigned int i; + + for (i = 0; i < len; i++) { + if (data[i] == '\r' || data[i] == '\n') + break; + } + + if (i == len) + return (NULL); + + if ((line = malloc(i + 1)) == NULL) { + fprintf(stderr, "%s: out of memory\n", __func__); + evbuffer_drain(buffer, i); + return (NULL); + } + + memcpy(line, data, i); + line[i] = '\0'; + + /* + * Some protocols terminate a line with '\r\n', so check for + * that, too. + */ + if ( i < len - 1 ) { + char fch = data[i], sch = data[i+1]; + + /* Drain one more character if needed */ + if ( (sch == '\r' || sch == '\n') && sch != fch ) + i += 1; + } + + evbuffer_drain(buffer, i + 1); + + return (line); +} + +/* Adds data to an event buffer */ + +static inline void +evbuffer_align(struct evbuffer *buf) +{ + memmove(buf->orig_buffer, buf->buffer, buf->off); + buf->buffer = buf->orig_buffer; + buf->misalign = 0; +} + +/* Expands the available space in the event buffer to at least datlen */ + +int +evbuffer_expand(struct evbuffer *buf, size_t datlen) +{ + size_t need = buf->misalign + buf->off + datlen; + + /* If we can fit all the data, then we don't have to do anything */ + if (buf->totallen >= need) + return (0); + + /* + * If the misalignment fulfills our data needs, we just force an + * alignment to happen. Afterwards, we have enough space. + */ + if (buf->misalign >= datlen) { + evbuffer_align(buf); + } else { + void *newbuf; + size_t length = buf->totallen; + + if (length < 256) + length = 256; + while (length < need) + length <<= 1; + + if (buf->orig_buffer != buf->buffer) + evbuffer_align(buf); + if ((newbuf = realloc(buf->buffer, length)) == NULL) + return (-1); + + buf->orig_buffer = buf->buffer = newbuf; + buf->totallen = length; + } + + return (0); +} + +int +evbuffer_add(struct evbuffer *buf, const void *data, size_t datlen) +{ + size_t need = buf->misalign + buf->off + datlen; + size_t oldoff = buf->off; + + if (buf->totallen < need) { + if (evbuffer_expand(buf, datlen) == -1) + return (-1); + } + + memcpy(buf->buffer + buf->off, data, datlen); + buf->off += datlen; + + if (datlen && buf->cb != NULL) + (*buf->cb)(buf, oldoff, buf->off, buf->cbarg); + + return (0); +} + +void +evbuffer_drain(struct evbuffer *buf, size_t len) +{ + size_t oldoff = buf->off; + + if (len >= buf->off) { + buf->off = 0; + buf->buffer = buf->orig_buffer; + buf->misalign = 0; + goto done; + } + + buf->buffer += len; + buf->misalign += len; + + buf->off -= len; + + done: + /* Tell someone about changes in this buffer */ + if (buf->off != oldoff && buf->cb != NULL) + (*buf->cb)(buf, oldoff, buf->off, buf->cbarg); + +} + +/* + * Reads data from a file descriptor into a buffer. + */ + +#define EVBUFFER_MAX_READ 4096 + +int +evbuffer_read(struct evbuffer *buf, int fd, int howmuch) +{ + u_char *p; + size_t oldoff = buf->off; + int n = EVBUFFER_MAX_READ; +#ifdef WIN32 + DWORD dwBytesRead; +#endif + +#ifdef FIONREAD + if (ioctl(fd, FIONREAD, &n) == -1 || n == 0) { + n = EVBUFFER_MAX_READ; + } else if (n > EVBUFFER_MAX_READ && n > howmuch) { + /* + * It's possible that a lot of data is available for + * reading. We do not want to exhaust resources + * before the reader has a chance to do something + * about it. If the reader does not tell us how much + * data we should read, we artifically limit it. + */ + if (n > buf->totallen << 2) + n = buf->totallen << 2; + if (n < EVBUFFER_MAX_READ) + n = EVBUFFER_MAX_READ; + } +#endif + if (howmuch < 0 || howmuch > n) + howmuch = n; + + /* If we don't have FIONREAD, we might waste some space here */ + if (evbuffer_expand(buf, howmuch) == -1) + return (-1); + + /* We can append new data at this point */ + p = buf->buffer + buf->off; + +#ifndef WIN32 + n = read(fd, p, howmuch); + if (n == -1) + return (-1); + if (n == 0) + return (0); +#else + n = ReadFile((HANDLE)fd, p, howmuch, &dwBytesRead, NULL); + if (n == 0) + return (-1); + if (dwBytesRead == 0) + return (0); + n = dwBytesRead; +#endif + + buf->off += n; + + /* Tell someone about changes in this buffer */ + if (buf->off != oldoff && buf->cb != NULL) + (*buf->cb)(buf, oldoff, buf->off, buf->cbarg); + + return (n); +} + +int +evbuffer_write(struct evbuffer *buffer, int fd) +{ + int n; +#ifdef WIN32 + DWORD dwBytesWritten; +#endif + +#ifndef WIN32 + n = write(fd, buffer->buffer, buffer->off); + if (n == -1) + return (-1); + if (n == 0) + return (0); +#else + n = WriteFile((HANDLE)fd, buffer->buffer, buffer->off, &dwBytesWritten, NULL); + if (n == 0) + return (-1); + if (dwBytesWritten == 0) + return (0); + n = dwBytesWritten; +#endif + evbuffer_drain(buffer, n); + + return (n); +} + +u_char * +evbuffer_find(struct evbuffer *buffer, const u_char *what, size_t len) +{ + size_t remain = buffer->off; + u_char *search = buffer->buffer; + u_char *p; + + while ((p = memchr(search, *what, remain)) != NULL) { + remain = buffer->off - (size_t)(search - buffer->buffer); + if (remain < len) + break; + if (memcmp(p, what, len) == 0) + return (p); + search = p + 1; + } + + return (NULL); +} + +void evbuffer_setcb(struct evbuffer *buffer, + void (*cb)(struct evbuffer *, size_t, size_t, void *), + void *cbarg) +{ + buffer->cb = cb; + buffer->cbarg = cbarg; +} diff --git a/contrib/pf/libevent/evbuffer.c b/contrib/pf/libevent/evbuffer.c new file mode 100644 index 0000000..52712bc --- /dev/null +++ b/contrib/pf/libevent/evbuffer.c @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2002-2004 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifdef HAVE_STDARG_H +#include <stdarg.h> +#endif + +#include "event.h" + +/* prototypes */ + +void bufferevent_setwatermark(struct bufferevent *, short, size_t, size_t); +void bufferevent_read_pressure_cb(struct evbuffer *, size_t, size_t, void *); + +static int +bufferevent_add(struct event *ev, int timeout) +{ + struct timeval tv, *ptv = NULL; + + if (timeout) { + timerclear(&tv); + tv.tv_sec = timeout; + ptv = &tv; + } + + return (event_add(ev, ptv)); +} + +/* + * This callback is executed when the size of the input buffer changes. + * We use it to apply back pressure on the reading side. + */ + +void +bufferevent_read_pressure_cb(struct evbuffer *buf, size_t old, size_t now, + void *arg) { + struct bufferevent *bufev = arg; + /* + * If we are below the watermark then reschedule reading if it's + * still enabled. + */ + if (bufev->wm_read.high == 0 || now < bufev->wm_read.high) { + evbuffer_setcb(buf, NULL, NULL); + + if (bufev->enabled & EV_READ) + bufferevent_add(&bufev->ev_read, bufev->timeout_read); + } +} + +static void +bufferevent_readcb(int fd, short event, void *arg) +{ + struct bufferevent *bufev = arg; + int res = 0; + short what = EVBUFFER_READ; + size_t len; + int howmuch = -1; + + if (event == EV_TIMEOUT) { + what |= EVBUFFER_TIMEOUT; + goto error; + } + + /* + * If we have a high watermark configured then we don't want to + * read more data than would make us reach the watermark. + */ + if (bufev->wm_read.high != 0) + howmuch = bufev->wm_read.high; + + res = evbuffer_read(bufev->input, fd, howmuch); + if (res == -1) { + if (errno == EAGAIN || errno == EINTR) + goto reschedule; + /* error case */ + what |= EVBUFFER_ERROR; + } else if (res == 0) { + /* eof case */ + what |= EVBUFFER_EOF; + } + + if (res <= 0) + goto error; + + bufferevent_add(&bufev->ev_read, bufev->timeout_read); + + /* See if this callbacks meets the water marks */ + len = EVBUFFER_LENGTH(bufev->input); + if (bufev->wm_read.low != 0 && len < bufev->wm_read.low) + return; + if (bufev->wm_read.high != 0 && len > bufev->wm_read.high) { + struct evbuffer *buf = bufev->input; + event_del(&bufev->ev_read); + + /* Now schedule a callback for us */ + evbuffer_setcb(buf, bufferevent_read_pressure_cb, bufev); + return; + } + + /* Invoke the user callback - must always be called last */ + if (bufev->readcb != NULL) + (*bufev->readcb)(bufev, bufev->cbarg); + return; + + reschedule: + bufferevent_add(&bufev->ev_read, bufev->timeout_read); + return; + + error: + (*bufev->errorcb)(bufev, what, bufev->cbarg); +} + +static void +bufferevent_writecb(int fd, short event, void *arg) +{ + struct bufferevent *bufev = arg; + int res = 0; + short what = EVBUFFER_WRITE; + + if (event == EV_TIMEOUT) { + what |= EVBUFFER_TIMEOUT; + goto error; + } + + if (EVBUFFER_LENGTH(bufev->output)) { + res = evbuffer_write(bufev->output, fd); + if (res == -1) { +#ifndef WIN32 +/*todo. evbuffer uses WriteFile when WIN32 is set. WIN32 system calls do not + *set errno. thus this error checking is not portable*/ + if (errno == EAGAIN || + errno == EINTR || + errno == EINPROGRESS) + goto reschedule; + /* error case */ + what |= EVBUFFER_ERROR; + +#else + goto reschedule; +#endif + + } else if (res == 0) { + /* eof case */ + what |= EVBUFFER_EOF; + } + if (res <= 0) + goto error; + } + + if (EVBUFFER_LENGTH(bufev->output) != 0) + bufferevent_add(&bufev->ev_write, bufev->timeout_write); + + /* + * Invoke the user callback if our buffer is drained or below the + * low watermark. + */ + if (bufev->writecb != NULL && + EVBUFFER_LENGTH(bufev->output) <= bufev->wm_write.low) + (*bufev->writecb)(bufev, bufev->cbarg); + + return; + + reschedule: + if (EVBUFFER_LENGTH(bufev->output) != 0) + bufferevent_add(&bufev->ev_write, bufev->timeout_write); + return; + + error: + (*bufev->errorcb)(bufev, what, bufev->cbarg); +} + +/* + * Create a new buffered event object. + * + * The read callback is invoked whenever we read new data. + * The write callback is invoked whenever the output buffer is drained. + * The error callback is invoked on a write/read error or on EOF. + * + * Both read and write callbacks maybe NULL. The error callback is not + * allowed to be NULL and have to be provided always. + */ + +struct bufferevent * +bufferevent_new(int fd, evbuffercb readcb, evbuffercb writecb, + everrorcb errorcb, void *cbarg) +{ + struct bufferevent *bufev; + + if ((bufev = calloc(1, sizeof(struct bufferevent))) == NULL) + return (NULL); + + if ((bufev->input = evbuffer_new()) == NULL) { + free(bufev); + return (NULL); + } + + if ((bufev->output = evbuffer_new()) == NULL) { + evbuffer_free(bufev->input); + free(bufev); + return (NULL); + } + + event_set(&bufev->ev_read, fd, EV_READ, bufferevent_readcb, bufev); + event_set(&bufev->ev_write, fd, EV_WRITE, bufferevent_writecb, bufev); + + bufev->readcb = readcb; + bufev->writecb = writecb; + bufev->errorcb = errorcb; + + bufev->cbarg = cbarg; + + /* + * Set to EV_WRITE so that using bufferevent_write is going to + * trigger a callback. Reading needs to be explicitly enabled + * because otherwise no data will be available. + */ + bufev->enabled = EV_WRITE; + + return (bufev); +} + +int +bufferevent_priority_set(struct bufferevent *bufev, int priority) +{ + if (event_priority_set(&bufev->ev_read, priority) == -1) + return (-1); + if (event_priority_set(&bufev->ev_write, priority) == -1) + return (-1); + + return (0); +} + +/* Closing the file descriptor is the responsibility of the caller */ + +void +bufferevent_free(struct bufferevent *bufev) +{ + event_del(&bufev->ev_read); + event_del(&bufev->ev_write); + + evbuffer_free(bufev->input); + evbuffer_free(bufev->output); + + free(bufev); +} + +/* + * Returns 0 on success; + * -1 on failure. + */ + +int +bufferevent_write(struct bufferevent *bufev, void *data, size_t size) +{ + int res; + + res = evbuffer_add(bufev->output, data, size); + + if (res == -1) + return (res); + + /* If everything is okay, we need to schedule a write */ + if (size > 0 && (bufev->enabled & EV_WRITE)) + bufferevent_add(&bufev->ev_write, bufev->timeout_write); + + return (res); +} + +int +bufferevent_write_buffer(struct bufferevent *bufev, struct evbuffer *buf) +{ + int res; + + res = bufferevent_write(bufev, buf->buffer, buf->off); + if (res != -1) + evbuffer_drain(buf, buf->off); + + return (res); +} + +size_t +bufferevent_read(struct bufferevent *bufev, void *data, size_t size) +{ + struct evbuffer *buf = bufev->input; + + if (buf->off < size) + size = buf->off; + + /* Copy the available data to the user buffer */ + memcpy(data, buf->buffer, size); + + if (size) + evbuffer_drain(buf, size); + + return (size); +} + +int +bufferevent_enable(struct bufferevent *bufev, short event) +{ + if (event & EV_READ) { + if (bufferevent_add(&bufev->ev_read, bufev->timeout_read) == -1) + return (-1); + } + if (event & EV_WRITE) { + if (bufferevent_add(&bufev->ev_write, bufev->timeout_write) == -1) + return (-1); + } + + bufev->enabled |= event; + return (0); +} + +int +bufferevent_disable(struct bufferevent *bufev, short event) +{ + if (event & EV_READ) { + if (event_del(&bufev->ev_read) == -1) + return (-1); + } + if (event & EV_WRITE) { + if (event_del(&bufev->ev_write) == -1) + return (-1); + } + + bufev->enabled &= ~event; + return (0); +} + +/* + * Sets the read and write timeout for a buffered event. + */ + +void +bufferevent_settimeout(struct bufferevent *bufev, + int timeout_read, int timeout_write) { + bufev->timeout_read = timeout_read; + bufev->timeout_write = timeout_write; +} + +/* + * Sets the water marks + */ + +void +bufferevent_setwatermark(struct bufferevent *bufev, short events, + size_t lowmark, size_t highmark) +{ + if (events & EV_READ) { + bufev->wm_read.low = lowmark; + bufev->wm_read.high = highmark; + } + + if (events & EV_WRITE) { + bufev->wm_write.low = lowmark; + bufev->wm_write.high = highmark; + } + + /* If the watermarks changed then see if we should call read again */ + bufferevent_read_pressure_cb(bufev->input, + 0, EVBUFFER_LENGTH(bufev->input), bufev); +} + +int +bufferevent_base_set(struct event_base *base, struct bufferevent *bufev) +{ + int res; + + res = event_base_set(base, &bufev->ev_read); + if (res == -1) + return (res); + + res = event_base_set(base, &bufev->ev_write); + return (res); +} diff --git a/contrib/pf/libevent/event-internal.h b/contrib/pf/libevent/event-internal.h new file mode 100644 index 0000000..becb669 --- /dev/null +++ b/contrib/pf/libevent/event-internal.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EVENT_INTERNAL_H_ +#define _EVENT_INTERNAL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct event_base { + const struct eventop *evsel; + void *evbase; + int event_count; /* counts number of total events */ + int event_count_active; /* counts number of active events */ + + int event_gotterm; /* Set to terminate loop */ + + /* active event management */ + struct event_list **activequeues; + int nactivequeues; + + struct event_list eventqueue; + struct timeval event_tv; + + RB_HEAD(event_tree, event) timetree; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _EVENT_INTERNAL_H_ */ diff --git a/contrib/pf/libevent/event.c b/contrib/pf/libevent/event.c new file mode 100644 index 0000000..f6d2b1c --- /dev/null +++ b/contrib/pf/libevent/event.c @@ -0,0 +1,878 @@ +/* + * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#undef WIN32_LEAN_AND_MEAN +#include "misc.h" +#endif +#include <sys/types.h> +#include <sys/tree.h> +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#else +#include <sys/_time.h> +#endif +#include <sys/queue.h> +#include <stdio.h> +#include <stdlib.h> +#ifndef WIN32 +#include <unistd.h> +#endif +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <assert.h> + +#include "event.h" +#include "event-internal.h" +#include "log.h" + +#ifdef HAVE_EVENT_PORTS +extern const struct eventop evportops; +#endif +#ifdef HAVE_SELECT +extern const struct eventop selectops; +#endif +#ifdef HAVE_POLL +extern const struct eventop pollops; +#endif +#ifdef HAVE_RTSIG +extern const struct eventop rtsigops; +#endif +#ifdef HAVE_EPOLL +extern const struct eventop epollops; +#endif +#ifdef HAVE_WORKING_KQUEUE +extern const struct eventop kqops; +#endif +#ifdef HAVE_DEVPOLL +extern const struct eventop devpollops; +#endif +#ifdef WIN32 +extern const struct eventop win32ops; +#endif + +/* In order of preference */ +const struct eventop *eventops[] = { +#ifdef HAVE_EVENT_PORTS + &evportops, +#endif +#ifdef HAVE_WORKING_KQUEUE + &kqops, +#endif +#ifdef HAVE_EPOLL + &epollops, +#endif +#ifdef HAVE_DEVPOLL + &devpollops, +#endif +#ifdef HAVE_RTSIG + &rtsigops, +#endif +#ifdef HAVE_POLL + &pollops, +#endif +#ifdef HAVE_SELECT + &selectops, +#endif +#ifdef WIN32 + &win32ops, +#endif + NULL +}; + +/* Global state */ +struct event_list signalqueue; + +struct event_base *current_base = NULL; + +/* Handle signals - This is a deprecated interface */ +int (*event_sigcb)(void); /* Signal callback when gotsig is set */ +volatile sig_atomic_t event_gotsig; /* Set in signal handler */ + +/* Prototypes */ +static void event_queue_insert(struct event_base *, struct event *, int); +static void event_queue_remove(struct event_base *, struct event *, int); +static int event_haveevents(struct event_base *); + +static void event_process_active(struct event_base *); + +static int timeout_next(struct event_base *, struct timeval *); +static void timeout_process(struct event_base *); +static void timeout_correct(struct event_base *, struct timeval *); + +static int +compare(struct event *a, struct event *b) +{ + if (timercmp(&a->ev_timeout, &b->ev_timeout, <)) + return (-1); + else if (timercmp(&a->ev_timeout, &b->ev_timeout, >)) + return (1); + if (a < b) + return (-1); + else if (a > b) + return (1); + return (0); +} + +static int +gettime(struct timeval *tp) +{ +#ifdef HAVE_CLOCK_GETTIME + struct timespec ts; + +#ifdef HAVE_CLOCK_MONOTONIC + if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) +#else + if (clock_gettime(CLOCK_REALTIME, &ts) == -1) +#endif + return (-1); + tp->tv_sec = ts.tv_sec; + tp->tv_usec = ts.tv_nsec / 1000; +#else + gettimeofday(tp, NULL); +#endif + + return (0); +} + +RB_PROTOTYPE(event_tree, event, ev_timeout_node, compare); + +RB_GENERATE(event_tree, event, ev_timeout_node, compare); + + +void * +event_init(void) +{ + int i; + + if ((current_base = calloc(1, sizeof(struct event_base))) == NULL) + event_err(1, "%s: calloc"); + + event_sigcb = NULL; + event_gotsig = 0; + gettime(¤t_base->event_tv); + + RB_INIT(¤t_base->timetree); + TAILQ_INIT(¤t_base->eventqueue); + TAILQ_INIT(&signalqueue); + + current_base->evbase = NULL; + for (i = 0; eventops[i] && !current_base->evbase; i++) { + current_base->evsel = eventops[i]; + + current_base->evbase = current_base->evsel->init(); + } + + if (current_base->evbase == NULL) + event_errx(1, "%s: no event mechanism available", __func__); + + if (getenv("EVENT_SHOW_METHOD")) + event_msgx("libevent using: %s\n", + current_base->evsel->name); + + /* allocate a single active event queue */ + event_base_priority_init(current_base, 1); + + return (current_base); +} + +void +event_base_free(struct event_base *base) +{ + int i; + + if (base == NULL && current_base) + base = current_base; + if (base == current_base) + current_base = NULL; + + assert(base); + assert(TAILQ_EMPTY(&base->eventqueue)); + for (i=0; i < base->nactivequeues; ++i) + assert(TAILQ_EMPTY(base->activequeues[i])); + + assert(RB_EMPTY(&base->timetree)); + + for (i = 0; i < base->nactivequeues; ++i) + free(base->activequeues[i]); + free(base->activequeues); + + if (base->evsel->dealloc != NULL) + base->evsel->dealloc(base->evbase); + + free(base); +} + +int +event_priority_init(int npriorities) +{ + return event_base_priority_init(current_base, npriorities); +} + +int +event_base_priority_init(struct event_base *base, int npriorities) +{ + int i; + + if (base->event_count_active) + return (-1); + + if (base->nactivequeues && npriorities != base->nactivequeues) { + for (i = 0; i < base->nactivequeues; ++i) { + free(base->activequeues[i]); + } + free(base->activequeues); + } + + /* Allocate our priority queues */ + base->nactivequeues = npriorities; + base->activequeues = (struct event_list **)calloc(base->nactivequeues, + npriorities * sizeof(struct event_list *)); + if (base->activequeues == NULL) + event_err(1, "%s: calloc", __func__); + + for (i = 0; i < base->nactivequeues; ++i) { + base->activequeues[i] = malloc(sizeof(struct event_list)); + if (base->activequeues[i] == NULL) + event_err(1, "%s: malloc", __func__); + TAILQ_INIT(base->activequeues[i]); + } + + return (0); +} + +int +event_haveevents(struct event_base *base) +{ + return (base->event_count > 0); +} + +/* + * Active events are stored in priority queues. Lower priorities are always + * process before higher priorities. Low priority events can starve high + * priority ones. + */ + +static void +event_process_active(struct event_base *base) +{ + struct event *ev; + struct event_list *activeq = NULL; + int i; + short ncalls; + + if (!base->event_count_active) + return; + + for (i = 0; i < base->nactivequeues; ++i) { + if (TAILQ_FIRST(base->activequeues[i]) != NULL) { + activeq = base->activequeues[i]; + break; + } + } + + assert(activeq != NULL); + + for (ev = TAILQ_FIRST(activeq); ev; ev = TAILQ_FIRST(activeq)) { + event_queue_remove(base, ev, EVLIST_ACTIVE); + + /* Allows deletes to work */ + ncalls = ev->ev_ncalls; + ev->ev_pncalls = &ncalls; + while (ncalls) { + ncalls--; + ev->ev_ncalls = ncalls; + (*ev->ev_callback)((int)ev->ev_fd, ev->ev_res, ev->ev_arg); + if (event_gotsig) + return; + } + } +} + +/* + * Wait continously for events. We exit only if no events are left. + */ + +int +event_dispatch(void) +{ + return (event_loop(0)); +} + +int +event_base_dispatch(struct event_base *event_base) +{ + return (event_base_loop(event_base, 0)); +} + +static void +event_loopexit_cb(int fd, short what, void *arg) +{ + struct event_base *base = arg; + base->event_gotterm = 1; +} + +/* not thread safe */ + +int +event_loopexit(struct timeval *tv) +{ + return (event_once(-1, EV_TIMEOUT, event_loopexit_cb, + current_base, tv)); +} + +int +event_base_loopexit(struct event_base *event_base, struct timeval *tv) +{ + return (event_once(-1, EV_TIMEOUT, event_loopexit_cb, + event_base, tv)); +} + +/* not thread safe */ + +int +event_loop(int flags) +{ + return event_base_loop(current_base, flags); +} + +int +event_base_loop(struct event_base *base, int flags) +{ + const struct eventop *evsel = base->evsel; + void *evbase = base->evbase; + struct timeval tv; + int res, done; + + done = 0; + while (!done) { + /* Calculate the initial events that we are waiting for */ + if (evsel->recalc(base, evbase, 0) == -1) + return (-1); + + /* Terminate the loop if we have been asked to */ + if (base->event_gotterm) { + base->event_gotterm = 0; + break; + } + + /* You cannot use this interface for multi-threaded apps */ + while (event_gotsig) { + event_gotsig = 0; + if (event_sigcb) { + res = (*event_sigcb)(); + if (res == -1) { + errno = EINTR; + return (-1); + } + } + } + + /* Check if time is running backwards */ + gettime(&tv); + if (timercmp(&tv, &base->event_tv, <)) { + struct timeval off; + event_debug(("%s: time is running backwards, corrected", + __func__)); + timersub(&base->event_tv, &tv, &off); + timeout_correct(base, &off); + } + base->event_tv = tv; + + if (!base->event_count_active && !(flags & EVLOOP_NONBLOCK)) + timeout_next(base, &tv); + else + timerclear(&tv); + + /* If we have no events, we just exit */ + if (!event_haveevents(base)) { + event_debug(("%s: no events registered.", __func__)); + return (1); + } + + res = evsel->dispatch(base, evbase, &tv); + + if (res == -1) + return (-1); + + timeout_process(base); + + if (base->event_count_active) { + event_process_active(base); + if (!base->event_count_active && (flags & EVLOOP_ONCE)) + done = 1; + } else if (flags & EVLOOP_NONBLOCK) + done = 1; + } + + event_debug(("%s: asked to terminate loop.", __func__)); + return (0); +} + +/* Sets up an event for processing once */ + +struct event_once { + struct event ev; + + void (*cb)(int, short, void *); + void *arg; +}; + +/* One-time callback, it deletes itself */ + +static void +event_once_cb(int fd, short events, void *arg) +{ + struct event_once *eonce = arg; + + (*eonce->cb)(fd, events, eonce->arg); + free(eonce); +} + +/* Schedules an event once */ + +int +event_once(int fd, short events, + void (*callback)(int, short, void *), void *arg, struct timeval *tv) +{ + struct event_once *eonce; + struct timeval etv; + int res; + + /* We cannot support signals that just fire once */ + if (events & EV_SIGNAL) + return (-1); + + if ((eonce = calloc(1, sizeof(struct event_once))) == NULL) + return (-1); + + eonce->cb = callback; + eonce->arg = arg; + + if (events == EV_TIMEOUT) { + if (tv == NULL) { + timerclear(&etv); + tv = &etv; + } + + evtimer_set(&eonce->ev, event_once_cb, eonce); + } else if (events & (EV_READ|EV_WRITE)) { + events &= EV_READ|EV_WRITE; + + event_set(&eonce->ev, fd, events, event_once_cb, eonce); + } else { + /* Bad event combination */ + free(eonce); + return (-1); + } + + res = event_add(&eonce->ev, tv); + if (res != 0) { + free(eonce); + return (res); + } + + return (0); +} + +void +event_set(struct event *ev, int fd, short events, + void (*callback)(int, short, void *), void *arg) +{ + /* Take the current base - caller needs to set the real base later */ + ev->ev_base = current_base; + + ev->ev_callback = callback; + ev->ev_arg = arg; + ev->ev_fd = fd; + ev->ev_events = events; + ev->ev_flags = EVLIST_INIT; + ev->ev_ncalls = 0; + ev->ev_pncalls = NULL; + + /* by default, we put new events into the middle priority */ + ev->ev_pri = current_base->nactivequeues/2; +} + +int +event_base_set(struct event_base *base, struct event *ev) +{ + /* Only innocent events may be assigned to a different base */ + if (ev->ev_flags != EVLIST_INIT) + return (-1); + + ev->ev_base = base; + ev->ev_pri = base->nactivequeues/2; + + return (0); +} + +/* + * Set's the priority of an event - if an event is already scheduled + * changing the priority is going to fail. + */ + +int +event_priority_set(struct event *ev, int pri) +{ + if (ev->ev_flags & EVLIST_ACTIVE) + return (-1); + if (pri < 0 || pri >= ev->ev_base->nactivequeues) + return (-1); + + ev->ev_pri = pri; + + return (0); +} + +/* + * Checks if a specific event is pending or scheduled. + */ + +int +event_pending(struct event *ev, short event, struct timeval *tv) +{ + struct timeval now, res; + int flags = 0; + + if (ev->ev_flags & EVLIST_INSERTED) + flags |= (ev->ev_events & (EV_READ|EV_WRITE)); + if (ev->ev_flags & EVLIST_ACTIVE) + flags |= ev->ev_res; + if (ev->ev_flags & EVLIST_TIMEOUT) + flags |= EV_TIMEOUT; + if (ev->ev_flags & EVLIST_SIGNAL) + flags |= EV_SIGNAL; + + event &= (EV_TIMEOUT|EV_READ|EV_WRITE|EV_SIGNAL); + + /* See if there is a timeout that we should report */ + if (tv != NULL && (flags & event & EV_TIMEOUT)) { + gettime(&now); + timersub(&ev->ev_timeout, &now, &res); + /* correctly remap to real time */ + gettimeofday(&now, NULL); + timeradd(&now, &res, tv); + } + + return (flags & event); +} + +int +event_add(struct event *ev, struct timeval *tv) +{ + struct event_base *base = ev->ev_base; + const struct eventop *evsel = base->evsel; + void *evbase = base->evbase; + + event_debug(( + "event_add: event: %p, %s%s%scall %p", + ev, + ev->ev_events & EV_READ ? "EV_READ " : " ", + ev->ev_events & EV_WRITE ? "EV_WRITE " : " ", + tv ? "EV_TIMEOUT " : " ", + ev->ev_callback)); + + assert(!(ev->ev_flags & ~EVLIST_ALL)); + + if (tv != NULL) { + struct timeval now; + + if (ev->ev_flags & EVLIST_TIMEOUT) + event_queue_remove(base, ev, EVLIST_TIMEOUT); + + /* Check if it is active due to a timeout. Rescheduling + * this timeout before the callback can be executed + * removes it from the active list. */ + if ((ev->ev_flags & EVLIST_ACTIVE) && + (ev->ev_res & EV_TIMEOUT)) { + /* See if we are just active executing this + * event in a loop + */ + if (ev->ev_ncalls && ev->ev_pncalls) { + /* Abort loop */ + *ev->ev_pncalls = 0; + } + + event_queue_remove(base, ev, EVLIST_ACTIVE); + } + + gettime(&now); + timeradd(&now, tv, &ev->ev_timeout); + + event_debug(( + "event_add: timeout in %d seconds, call %p", + tv->tv_sec, ev->ev_callback)); + + event_queue_insert(base, ev, EVLIST_TIMEOUT); + } + + if ((ev->ev_events & (EV_READ|EV_WRITE)) && + !(ev->ev_flags & (EVLIST_INSERTED|EVLIST_ACTIVE))) { + event_queue_insert(base, ev, EVLIST_INSERTED); + + return (evsel->add(evbase, ev)); + } else if ((ev->ev_events & EV_SIGNAL) && + !(ev->ev_flags & EVLIST_SIGNAL)) { + event_queue_insert(base, ev, EVLIST_SIGNAL); + + return (evsel->add(evbase, ev)); + } + + return (0); +} + +int +event_del(struct event *ev) +{ + struct event_base *base; + const struct eventop *evsel; + void *evbase; + + event_debug(("event_del: %p, callback %p", + ev, ev->ev_callback)); + + /* An event without a base has not been added */ + if (ev->ev_base == NULL) + return (-1); + + base = ev->ev_base; + evsel = base->evsel; + evbase = base->evbase; + + assert(!(ev->ev_flags & ~EVLIST_ALL)); + + /* See if we are just active executing this event in a loop */ + if (ev->ev_ncalls && ev->ev_pncalls) { + /* Abort loop */ + *ev->ev_pncalls = 0; + } + + if (ev->ev_flags & EVLIST_TIMEOUT) + event_queue_remove(base, ev, EVLIST_TIMEOUT); + + if (ev->ev_flags & EVLIST_ACTIVE) + event_queue_remove(base, ev, EVLIST_ACTIVE); + + if (ev->ev_flags & EVLIST_INSERTED) { + event_queue_remove(base, ev, EVLIST_INSERTED); + return (evsel->del(evbase, ev)); + } else if (ev->ev_flags & EVLIST_SIGNAL) { + event_queue_remove(base, ev, EVLIST_SIGNAL); + return (evsel->del(evbase, ev)); + } + + return (0); +} + +void +event_active(struct event *ev, int res, short ncalls) +{ + /* We get different kinds of events, add them together */ + if (ev->ev_flags & EVLIST_ACTIVE) { + ev->ev_res |= res; + return; + } + + ev->ev_res = res; + ev->ev_ncalls = ncalls; + ev->ev_pncalls = NULL; + event_queue_insert(ev->ev_base, ev, EVLIST_ACTIVE); +} + +int +timeout_next(struct event_base *base, struct timeval *tv) +{ + struct timeval dflt = TIMEOUT_DEFAULT; + + struct timeval now; + struct event *ev; + + if ((ev = RB_MIN(event_tree, &base->timetree)) == NULL) { + *tv = dflt; + return (0); + } + + if (gettime(&now) == -1) + return (-1); + + if (timercmp(&ev->ev_timeout, &now, <=)) { + timerclear(tv); + return (0); + } + + timersub(&ev->ev_timeout, &now, tv); + + assert(tv->tv_sec >= 0); + assert(tv->tv_usec >= 0); + + event_debug(("timeout_next: in %d seconds", tv->tv_sec)); + return (0); +} + +static void +timeout_correct(struct event_base *base, struct timeval *off) +{ + struct event *ev; + + /* + * We can modify the key element of the node without destroying + * the key, beause we apply it to all in the right order. + */ + RB_FOREACH(ev, event_tree, &base->timetree) + timersub(&ev->ev_timeout, off, &ev->ev_timeout); +} + +void +timeout_process(struct event_base *base) +{ + struct timeval now; + struct event *ev, *next; + + gettime(&now); + + for (ev = RB_MIN(event_tree, &base->timetree); ev; ev = next) { + if (timercmp(&ev->ev_timeout, &now, >)) + break; + next = RB_NEXT(event_tree, &base->timetree, ev); + + event_queue_remove(base, ev, EVLIST_TIMEOUT); + + /* delete this event from the I/O queues */ + event_del(ev); + + event_debug(("timeout_process: call %p", + ev->ev_callback)); + event_active(ev, EV_TIMEOUT, 1); + } +} + +void +event_queue_remove(struct event_base *base, struct event *ev, int queue) +{ + int docount = 1; + + if (!(ev->ev_flags & queue)) + event_errx(1, "%s: %p(fd %d) not on queue %x", __func__, + ev, ev->ev_fd, queue); + + if (ev->ev_flags & EVLIST_INTERNAL) + docount = 0; + + if (docount) + base->event_count--; + + ev->ev_flags &= ~queue; + switch (queue) { + case EVLIST_ACTIVE: + if (docount) + base->event_count_active--; + TAILQ_REMOVE(base->activequeues[ev->ev_pri], + ev, ev_active_next); + break; + case EVLIST_SIGNAL: + TAILQ_REMOVE(&signalqueue, ev, ev_signal_next); + break; + case EVLIST_TIMEOUT: + RB_REMOVE(event_tree, &base->timetree, ev); + break; + case EVLIST_INSERTED: + TAILQ_REMOVE(&base->eventqueue, ev, ev_next); + break; + default: + event_errx(1, "%s: unknown queue %x", __func__, queue); + } +} + +void +event_queue_insert(struct event_base *base, struct event *ev, int queue) +{ + int docount = 1; + + if (ev->ev_flags & queue) { + /* Double insertion is possible for active events */ + if (queue & EVLIST_ACTIVE) + return; + + event_errx(1, "%s: %p(fd %d) already on queue %x", __func__, + ev, ev->ev_fd, queue); + } + + if (ev->ev_flags & EVLIST_INTERNAL) + docount = 0; + + if (docount) + base->event_count++; + + ev->ev_flags |= queue; + switch (queue) { + case EVLIST_ACTIVE: + if (docount) + base->event_count_active++; + TAILQ_INSERT_TAIL(base->activequeues[ev->ev_pri], + ev,ev_active_next); + break; + case EVLIST_SIGNAL: + TAILQ_INSERT_TAIL(&signalqueue, ev, ev_signal_next); + break; + case EVLIST_TIMEOUT: { + struct event *tmp = RB_INSERT(event_tree, &base->timetree, ev); + assert(tmp == NULL); + break; + } + case EVLIST_INSERTED: + TAILQ_INSERT_TAIL(&base->eventqueue, ev, ev_next); + break; + default: + event_errx(1, "%s: unknown queue %x", __func__, queue); + } +} + +/* Functions for debugging */ + +const char * +event_get_version(void) +{ + return (VERSION); +} + +/* + * No thread-safe interface needed - the information should be the same + * for all threads. + */ + +const char * +event_get_method(void) +{ + return (current_base->evsel->name); +} diff --git a/contrib/pf/libevent/event.h b/contrib/pf/libevent/event.h new file mode 100644 index 0000000..3f2032d --- /dev/null +++ b/contrib/pf/libevent/event.h @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EVENT_H_ +#define _EVENT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdarg.h> + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#undef WIN32_LEAN_AND_MEAN +typedef unsigned char u_char; +typedef unsigned short u_short; +#endif + +#define EVLIST_TIMEOUT 0x01 +#define EVLIST_INSERTED 0x02 +#define EVLIST_SIGNAL 0x04 +#define EVLIST_ACTIVE 0x08 +#define EVLIST_INTERNAL 0x10 +#define EVLIST_INIT 0x80 + +/* EVLIST_X_ Private space: 0x1000-0xf000 */ +#define EVLIST_ALL (0xf000 | 0x9f) + +#define EV_TIMEOUT 0x01 +#define EV_READ 0x02 +#define EV_WRITE 0x04 +#define EV_SIGNAL 0x08 +#define EV_PERSIST 0x10 /* Persistant event */ + +/* Fix so that ppl dont have to run with <sys/queue.h> */ +#ifndef TAILQ_ENTRY +#define _EVENT_DEFINED_TQENTRY +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ +} +#endif /* !TAILQ_ENTRY */ +#ifndef RB_ENTRY +#define _EVENT_DEFINED_RBENTRY +#define RB_ENTRY(type) \ +struct { \ + struct type *rbe_left; /* left element */ \ + struct type *rbe_right; /* right element */ \ + struct type *rbe_parent; /* parent element */ \ + int rbe_color; /* node color */ \ +} +#endif /* !RB_ENTRY */ + +struct event_base; +struct event { + TAILQ_ENTRY (event) ev_next; + TAILQ_ENTRY (event) ev_active_next; + TAILQ_ENTRY (event) ev_signal_next; + RB_ENTRY (event) ev_timeout_node; + + struct event_base *ev_base; + int ev_fd; + short ev_events; + short ev_ncalls; + short *ev_pncalls; /* Allows deletes in callback */ + + struct timeval ev_timeout; + + int ev_pri; /* smaller numbers are higher priority */ + + void (*ev_callback)(int, short, void *arg); + void *ev_arg; + + int ev_res; /* result passed to event callback */ + int ev_flags; +}; + +#define EVENT_SIGNAL(ev) (int)(ev)->ev_fd +#define EVENT_FD(ev) (int)(ev)->ev_fd + +/* + * Key-Value pairs. Can be used for HTTP headers but also for + * query argument parsing. + */ +struct evkeyval { + TAILQ_ENTRY(evkeyval) next; + + char *key; + char *value; +}; + +#ifdef _EVENT_DEFINED_TQENTRY +#undef TAILQ_ENTRY +struct event_list; +struct evkeyvalq; +#undef _EVENT_DEFINED_TQENTRY +#else +TAILQ_HEAD (event_list, event); +TAILQ_HEAD (evkeyvalq, evkeyval); +#endif /* _EVENT_DEFINED_TQENTRY */ +#ifdef _EVENT_DEFINED_RBENTRY +#undef RB_ENTRY +#undef _EVENT_DEFINED_RBENTRY +#endif /* _EVENT_DEFINED_RBENTRY */ + +struct eventop { + char *name; + void *(*init)(void); + int (*add)(void *, struct event *); + int (*del)(void *, struct event *); + int (*recalc)(struct event_base *, void *, int); + int (*dispatch)(struct event_base *, void *, struct timeval *); + void (*dealloc)(void *); +}; + +#define TIMEOUT_DEFAULT {5, 0} + +void *event_init(void); +int event_dispatch(void); +int event_base_dispatch(struct event_base *); +void event_base_free(struct event_base *); + +#define _EVENT_LOG_DEBUG 0 +#define _EVENT_LOG_MSG 1 +#define _EVENT_LOG_WARN 2 +#define _EVENT_LOG_ERR 3 +typedef void (*event_log_cb)(int severity, const char *msg); +void event_set_log_callback(event_log_cb cb); + +/* Associate a different event base with an event */ +int event_base_set(struct event_base *, struct event *); + +#define EVLOOP_ONCE 0x01 +#define EVLOOP_NONBLOCK 0x02 +int event_loop(int); +int event_base_loop(struct event_base *, int); +int event_loopexit(struct timeval *); /* Causes the loop to exit */ +int event_base_loopexit(struct event_base *, struct timeval *); + +#define evtimer_add(ev, tv) event_add(ev, tv) +#define evtimer_set(ev, cb, arg) event_set(ev, -1, 0, cb, arg) +#define evtimer_del(ev) event_del(ev) +#define evtimer_pending(ev, tv) event_pending(ev, EV_TIMEOUT, tv) +#define evtimer_initialized(ev) ((ev)->ev_flags & EVLIST_INIT) + +#define timeout_add(ev, tv) event_add(ev, tv) +#define timeout_set(ev, cb, arg) event_set(ev, -1, 0, cb, arg) +#define timeout_del(ev) event_del(ev) +#define timeout_pending(ev, tv) event_pending(ev, EV_TIMEOUT, tv) +#define timeout_initialized(ev) ((ev)->ev_flags & EVLIST_INIT) + +#define signal_add(ev, tv) event_add(ev, tv) +#define signal_set(ev, x, cb, arg) \ + event_set(ev, x, EV_SIGNAL|EV_PERSIST, cb, arg) +#define signal_del(ev) event_del(ev) +#define signal_pending(ev, tv) event_pending(ev, EV_SIGNAL, tv) +#define signal_initialized(ev) ((ev)->ev_flags & EVLIST_INIT) + +void event_set(struct event *, int, short, void (*)(int, short, void *), void *); +int event_once(int, short, void (*)(int, short, void *), void *, struct timeval *); + +int event_add(struct event *, struct timeval *); +int event_del(struct event *); +void event_active(struct event *, int, short); + +int event_pending(struct event *, short, struct timeval *); + +#ifdef WIN32 +#define event_initialized(ev) ((ev)->ev_flags & EVLIST_INIT && (ev)->ev_fd != (int)INVALID_HANDLE_VALUE) +#else +#define event_initialized(ev) ((ev)->ev_flags & EVLIST_INIT) +#endif + +/* Some simple debugging functions */ +const char *event_get_version(void); +const char *event_get_method(void); + +/* These functions deal with event priorities */ + +int event_priority_init(int); +int event_base_priority_init(struct event_base *, int); +int event_priority_set(struct event *, int); + +/* These functions deal with buffering input and output */ + +struct evbuffer { + u_char *buffer; + u_char *orig_buffer; + + size_t misalign; + size_t totallen; + size_t off; + + void (*cb)(struct evbuffer *, size_t, size_t, void *); + void *cbarg; +}; + +/* Just for error reporting - use other constants otherwise */ +#define EVBUFFER_READ 0x01 +#define EVBUFFER_WRITE 0x02 +#define EVBUFFER_EOF 0x10 +#define EVBUFFER_ERROR 0x20 +#define EVBUFFER_TIMEOUT 0x40 + +struct bufferevent; +typedef void (*evbuffercb)(struct bufferevent *, void *); +typedef void (*everrorcb)(struct bufferevent *, short what, void *); + +struct event_watermark { + size_t low; + size_t high; +}; + +struct bufferevent { + struct event ev_read; + struct event ev_write; + + struct evbuffer *input; + struct evbuffer *output; + + struct event_watermark wm_read; + struct event_watermark wm_write; + + evbuffercb readcb; + evbuffercb writecb; + everrorcb errorcb; + void *cbarg; + + int timeout_read; /* in seconds */ + int timeout_write; /* in seconds */ + + short enabled; /* events that are currently enabled */ +}; + +struct bufferevent *bufferevent_new(int fd, + evbuffercb readcb, evbuffercb writecb, everrorcb errorcb, void *cbarg); +int bufferevent_base_set(struct event_base *base, struct bufferevent *bufev); +int bufferevent_priority_set(struct bufferevent *bufev, int pri); +void bufferevent_free(struct bufferevent *bufev); +int bufferevent_write(struct bufferevent *bufev, void *data, size_t size); +int bufferevent_write_buffer(struct bufferevent *bufev, struct evbuffer *buf); +size_t bufferevent_read(struct bufferevent *bufev, void *data, size_t size); +int bufferevent_enable(struct bufferevent *bufev, short event); +int bufferevent_disable(struct bufferevent *bufev, short event); +void bufferevent_settimeout(struct bufferevent *bufev, + int timeout_read, int timeout_write); + +#define EVBUFFER_LENGTH(x) (x)->off +#define EVBUFFER_DATA(x) (x)->buffer +#define EVBUFFER_INPUT(x) (x)->input +#define EVBUFFER_OUTPUT(x) (x)->output + +struct evbuffer *evbuffer_new(void); +void evbuffer_free(struct evbuffer *); +int evbuffer_expand(struct evbuffer *, size_t); +int evbuffer_add(struct evbuffer *, const void *, size_t); +int evbuffer_remove(struct evbuffer *, void *, size_t); +char *evbuffer_readline(struct evbuffer *); +int evbuffer_add_buffer(struct evbuffer *, struct evbuffer *); +int evbuffer_add_printf(struct evbuffer *, const char *fmt, ...); +int evbuffer_add_vprintf(struct evbuffer *, const char *fmt, va_list ap); +void evbuffer_drain(struct evbuffer *, size_t); +int evbuffer_write(struct evbuffer *, int); +int evbuffer_read(struct evbuffer *, int, int); +u_char *evbuffer_find(struct evbuffer *, const u_char *, size_t); +void evbuffer_setcb(struct evbuffer *, void (*)(struct evbuffer *, size_t, size_t, void *), void *); + +/* + * Marshaling tagged data - We assume that all tags are inserted in their + * numeric order - so that unknown tags will always be higher than the + * known ones - and we can just ignore the end of an event buffer. + */ + +void evtag_init(void); + +void evtag_marshal(struct evbuffer *evbuf, u_int8_t tag, const void *data, + u_int32_t len); + +void encode_int(struct evbuffer *evbuf, u_int32_t number); + +void evtag_marshal_int(struct evbuffer *evbuf, u_int8_t tag, + u_int32_t integer); + +void evtag_marshal_string(struct evbuffer *buf, u_int8_t tag, + const char *string); + +void evtag_marshal_timeval(struct evbuffer *evbuf, u_int8_t tag, + struct timeval *tv); + +void evtag_test(void); + +int evtag_unmarshal(struct evbuffer *src, u_int8_t *ptag, + struct evbuffer *dst); +int evtag_peek(struct evbuffer *evbuf, u_int8_t *ptag); +int evtag_peek_length(struct evbuffer *evbuf, u_int32_t *plength); +int evtag_payload_length(struct evbuffer *evbuf, u_int32_t *plength); +int evtag_consume(struct evbuffer *evbuf); + +int evtag_unmarshal_int(struct evbuffer *evbuf, u_int8_t need_tag, + u_int32_t *pinteger); + +int evtag_unmarshal_fixed(struct evbuffer *src, u_int8_t need_tag, void *data, + size_t len); + +int evtag_unmarshal_string(struct evbuffer *evbuf, u_int8_t need_tag, + char **pstring); + +int evtag_unmarshal_timeval(struct evbuffer *evbuf, u_int8_t need_tag, + struct timeval *ptv); + +#ifdef __cplusplus +} +#endif + +#endif /* _EVENT_H_ */ diff --git a/contrib/pf/libevent/evsignal.h b/contrib/pf/libevent/evsignal.h new file mode 100644 index 0000000..5b92bd6 --- /dev/null +++ b/contrib/pf/libevent/evsignal.h @@ -0,0 +1,35 @@ +/* + * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EVSIGNAL_H_ +#define _EVSIGNAL_H_ + +void evsignal_init(void); +void evsignal_process(void); +int evsignal_add(struct event *); +int evsignal_del(struct event *); + +#endif /* _EVSIGNAL_H_ */ diff --git a/contrib/pf/libevent/kqueue.c b/contrib/pf/libevent/kqueue.c new file mode 100644 index 0000000..08369c6 --- /dev/null +++ b/contrib/pf/libevent/kqueue.c @@ -0,0 +1,413 @@ +/* $OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $ */ + +/* + * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#else +#include <sys/_time.h> +#endif +#include <sys/queue.h> +#include <sys/event.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif + +#if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) +#define INTPTR(x) (intptr_t)x +#else +#define INTPTR(x) x +#endif + +#include "event.h" +#include "log.h" + +#define EVLIST_X_KQINKERNEL 0x1000 + +#define NEVENT 64 + +struct kqop { + struct kevent *changes; + int nchanges; + struct kevent *events; + int nevents; + int kq; +}; + +void *kq_init (void); +int kq_add (void *, struct event *); +int kq_del (void *, struct event *); +int kq_recalc (struct event_base *, void *, int); +int kq_dispatch (struct event_base *, void *, struct timeval *); +int kq_insert (struct kqop *, struct kevent *); +void kq_dealloc (void *); + +const struct eventop kqops = { + "kqueue", + kq_init, + kq_add, + kq_del, + kq_recalc, + kq_dispatch, + kq_dealloc +}; + +void * +kq_init(void) +{ + int kq; + struct kqop *kqueueop; + + /* Disable kqueue when this environment variable is set */ + if (getenv("EVENT_NOKQUEUE")) + return (NULL); + + if (!(kqueueop = calloc(1, sizeof(struct kqop)))) + return (NULL); + + /* Initalize the kernel queue */ + + if ((kq = kqueue()) == -1) { + event_warn("kqueue"); + free (kqueueop); + return (NULL); + } + + kqueueop->kq = kq; + + /* Initalize fields */ + kqueueop->changes = malloc(NEVENT * sizeof(struct kevent)); + if (kqueueop->changes == NULL) { + free (kqueueop); + return (NULL); + } + kqueueop->events = malloc(NEVENT * sizeof(struct kevent)); + if (kqueueop->events == NULL) { + free (kqueueop->changes); + free (kqueueop); + return (NULL); + } + kqueueop->nevents = NEVENT; + + /* Check for Mac OS X kqueue bug. */ + kqueueop->changes[0].ident = -1; + kqueueop->changes[0].filter = EVFILT_READ; + kqueueop->changes[0].flags = EV_ADD; + /* + * If kqueue works, then kevent will succeed, and it will + * stick an error in events[0]. If kqueue is broken, then + * kevent will fail. + */ + if (kevent(kq, + kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || + kqueueop->events[0].ident != -1 || + kqueueop->events[0].flags != EV_ERROR) { + event_warn("%s: detected broken kqueue; not using.", __func__); + free(kqueueop->changes); + free(kqueueop->events); + free(kqueueop); + close(kq); + return (NULL); + } + + return (kqueueop); +} + +int +kq_recalc(struct event_base *base, void *arg, int max) +{ + return (0); +} + +int +kq_insert(struct kqop *kqop, struct kevent *kev) +{ + int nevents = kqop->nevents; + + if (kqop->nchanges == nevents) { + struct kevent *newchange; + struct kevent *newresult; + + nevents *= 2; + + newchange = realloc(kqop->changes, + nevents * sizeof(struct kevent)); + if (newchange == NULL) { + event_warn("%s: malloc", __func__); + return (-1); + } + kqop->changes = newchange; + + newresult = realloc(kqop->events, + nevents * sizeof(struct kevent)); + + /* + * If we fail, we don't have to worry about freeing, + * the next realloc will pick it up. + */ + if (newresult == NULL) { + event_warn("%s: malloc", __func__); + return (-1); + } + kqop->events = newresult; + + kqop->nevents = nevents; + } + + memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); + + event_debug(("%s: fd %d %s%s", + __func__, kev->ident, + kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", + kev->flags == EV_DELETE ? " (del)" : "")); + + return (0); +} + +static void +kq_sighandler(int sig) +{ + /* Do nothing here */ +} + +int +kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) +{ + struct kqop *kqop = arg; + struct kevent *changes = kqop->changes; + struct kevent *events = kqop->events; + struct event *ev; + struct timespec ts; + int i, res; + + TIMEVAL_TO_TIMESPEC(tv, &ts); + + res = kevent(kqop->kq, changes, kqop->nchanges, + events, kqop->nevents, &ts); + kqop->nchanges = 0; + if (res == -1) { + if (errno != EINTR) { + event_warn("kevent"); + return (-1); + } + + return (0); + } + + event_debug(("%s: kevent reports %d", __func__, res)); + + for (i = 0; i < res; i++) { + int which = 0; + + if (events[i].flags & EV_ERROR) { + /* + * Error messages that can happen, when a delete fails. + * EBADF happens when the file discriptor has been + * closed, + * ENOENT when the file discriptor was closed and + * then reopened. + * EINVAL for some reasons not understood; EINVAL + * should not be returned ever; but FreeBSD does :-\ + * An error is also indicated when a callback deletes + * an event we are still processing. In that case + * the data field is set to ENOENT. + */ + if (events[i].data == EBADF || + events[i].data == EINVAL || + events[i].data == ENOENT) + continue; + errno = events[i].data; + return (-1); + } + + ev = (struct event *)events[i].udata; + + if (events[i].filter == EVFILT_READ) { + which |= EV_READ; + } else if (events[i].filter == EVFILT_WRITE) { + which |= EV_WRITE; + } else if (events[i].filter == EVFILT_SIGNAL) { + which |= EV_SIGNAL; + } + + if (!which) + continue; + + if (!(ev->ev_events & EV_PERSIST)) + event_del(ev); + + event_active(ev, which, + ev->ev_events & EV_SIGNAL ? events[i].data : 1); + } + + return (0); +} + + +int +kq_add(void *arg, struct event *ev) +{ + struct kqop *kqop = arg; + struct kevent kev; + + if (ev->ev_events & EV_SIGNAL) { + int nsignal = EVENT_SIGNAL(ev); + + memset(&kev, 0, sizeof(kev)); + kev.ident = nsignal; + kev.filter = EVFILT_SIGNAL; + kev.flags = EV_ADD; + if (!(ev->ev_events & EV_PERSIST)) + kev.flags |= EV_ONESHOT; + kev.udata = INTPTR(ev); + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + if (signal(nsignal, kq_sighandler) == SIG_ERR) + return (-1); + + ev->ev_flags |= EVLIST_X_KQINKERNEL; + return (0); + } + + if (ev->ev_events & EV_READ) { + memset(&kev, 0, sizeof(kev)); + kev.ident = ev->ev_fd; + kev.filter = EVFILT_READ; +#ifdef NOTE_EOF + /* Make it behave like select() and poll() */ + kev.fflags = NOTE_EOF; +#endif + kev.flags = EV_ADD; + if (!(ev->ev_events & EV_PERSIST)) + kev.flags |= EV_ONESHOT; + kev.udata = INTPTR(ev); + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + ev->ev_flags |= EVLIST_X_KQINKERNEL; + } + + if (ev->ev_events & EV_WRITE) { + memset(&kev, 0, sizeof(kev)); + kev.ident = ev->ev_fd; + kev.filter = EVFILT_WRITE; + kev.flags = EV_ADD; + if (!(ev->ev_events & EV_PERSIST)) + kev.flags |= EV_ONESHOT; + kev.udata = INTPTR(ev); + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + ev->ev_flags |= EVLIST_X_KQINKERNEL; + } + + return (0); +} + +int +kq_del(void *arg, struct event *ev) +{ + struct kqop *kqop = arg; + struct kevent kev; + + if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) + return (0); + + if (ev->ev_events & EV_SIGNAL) { + int nsignal = EVENT_SIGNAL(ev); + + memset(&kev, 0, sizeof(kev)); + kev.ident = nsignal; + kev.filter = EVFILT_SIGNAL; + kev.flags = EV_DELETE; + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + if (signal(nsignal, SIG_DFL) == SIG_ERR) + return (-1); + + ev->ev_flags &= ~EVLIST_X_KQINKERNEL; + return (0); + } + + if (ev->ev_events & EV_READ) { + memset(&kev, 0, sizeof(kev)); + kev.ident = ev->ev_fd; + kev.filter = EVFILT_READ; + kev.flags = EV_DELETE; + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + ev->ev_flags &= ~EVLIST_X_KQINKERNEL; + } + + if (ev->ev_events & EV_WRITE) { + memset(&kev, 0, sizeof(kev)); + kev.ident = ev->ev_fd; + kev.filter = EVFILT_WRITE; + kev.flags = EV_DELETE; + + if (kq_insert(kqop, &kev) == -1) + return (-1); + + ev->ev_flags &= ~EVLIST_X_KQINKERNEL; + } + + return (0); +} + +void +kq_dealloc(void *arg) +{ + struct kqop *kqop = arg; + + if (kqop->changes) + free(kqop->changes); + if (kqop->events) + free(kqop->events); + if (kqop->kq) + close(kqop->kq); + memset(kqop, 0, sizeof(struct kqop)); + free(kqop); +} diff --git a/contrib/pf/libevent/log.c b/contrib/pf/libevent/log.c new file mode 100644 index 0000000..c9275e3 --- /dev/null +++ b/contrib/pf/libevent/log.c @@ -0,0 +1,219 @@ +/* $OpenBSD: err.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */ + +/* + * log.c + * + * Based on err.c, which was adapted from OpenBSD libc *err* *warn* code. + * + * Copyright (c) 2005 Nick Mathewson <nickm@freehaven.net> + * + * Copyright (c) 2000 Dug Song <dugsong@monkey.org> + * + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#undef WIN32_LEAN_AND_MEAN +#include "misc.h" +#endif +#include <sys/types.h> +#include <sys/tree.h> +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#else +#include <sys/_time.h> +#endif +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <errno.h> +#include "event.h" + +#include "log.h" + +static void _warn_helper(int severity, int log_errno, const char *fmt, + va_list ap); +static void event_log(int severity, const char *msg); + +static int +event_vsnprintf(char *str, size_t size, const char *format, va_list args) +{ + int r; + if (size == 0) + return -1; +#ifdef WIN32 + r = _vsnprintf(str, size, format, args); +#else + r = vsnprintf(str, size, format, args); +#endif + str[size-1] = '\0'; + if (r < 0 || ((size_t)r) >= size) { + /* different platforms behave differently on overflow; + * handle both kinds. */ + return -1; + } + return r; +} + +static int +event_snprintf(char *str, size_t size, const char *format, ...) +{ + va_list ap; + int r; + va_start(ap, format); + r = event_vsnprintf(str, size, format, ap); + va_end(ap); + return r; +} + +void +event_err(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_ERR, errno, fmt, ap); + va_end(ap); + exit(eval); +} + +void +event_warn(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_WARN, errno, fmt, ap); + va_end(ap); +} + +void +event_errx(int eval, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_ERR, -1, fmt, ap); + va_end(ap); + exit(eval); +} + +void +event_warnx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_WARN, -1, fmt, ap); + va_end(ap); +} + +void +event_msgx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_MSG, -1, fmt, ap); + va_end(ap); +} + +void +_event_debugx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _warn_helper(_EVENT_LOG_DEBUG, -1, fmt, ap); + va_end(ap); +} + +static void +_warn_helper(int severity, int log_errno, const char *fmt, va_list ap) +{ + char buf[1024]; + size_t len; + + if (fmt != NULL) + event_vsnprintf(buf, sizeof(buf), fmt, ap); + else + buf[0] = '\0'; + + if (log_errno >= 0) { + len = strlen(buf); + if (len < sizeof(buf) - 3) { + event_snprintf(buf + len, sizeof(buf) - len, ": %s", + strerror(log_errno)); + } + } + + event_log(severity, buf); +} + +static event_log_cb log_fn = NULL; + +void +event_set_log_callback(event_log_cb cb) +{ + log_fn = cb; +} + +static void +event_log(int severity, const char *msg) +{ + if (log_fn) + log_fn(severity, msg); + else { + const char *severity_str; + switch (severity) { + case _EVENT_LOG_DEBUG: + severity_str = "debug"; + break; + case _EVENT_LOG_MSG: + severity_str = "msg"; + break; + case _EVENT_LOG_WARN: + severity_str = "warn"; + break; + case _EVENT_LOG_ERR: + severity_str = "err"; + break; + default: + severity_str = "???"; + break; + } + (void)fprintf(stderr, "[%s] %s\n", severity_str, msg); + } +} diff --git a/contrib/pf/libevent/log.h b/contrib/pf/libevent/log.h new file mode 100644 index 0000000..1f843cf --- /dev/null +++ b/contrib/pf/libevent/log.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _LOG_H_ +#define _LOG_H_ + +void event_err(int eval, const char *fmt, ...); +void event_warn(const char *fmt, ...); +void event_errx(int eval, const char *fmt, ...); +void event_warnx(const char *fmt, ...); +void event_msgx(const char *fmt, ...); +void _event_debugx(const char *fmt, ...); + +#ifdef USE_DEBUG +#define event_debug(x) _event_debugx x +#else +#define event_debug(x) do {;} while (0) +#endif + +#endif diff --git a/contrib/pf/libevent/poll.c b/contrib/pf/libevent/poll.c new file mode 100644 index 0000000..14ca845 --- /dev/null +++ b/contrib/pf/libevent/poll.c @@ -0,0 +1,388 @@ +/* $OpenBSD: poll.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */ + +/* + * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#else +#include <sys/_time.h> +#endif +#include <sys/queue.h> +#include <sys/tree.h> +#include <poll.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#ifdef CHECK_INVARIANTS +#include <assert.h> +#endif + +#include "event.h" +#include "event-internal.h" +#include "evsignal.h" +#include "log.h" + +extern volatile sig_atomic_t evsignal_caught; + +struct pollop { + int event_count; /* Highest number alloc */ + int nfds; /* Size of event_* */ + int fd_count; /* Size of idxplus1_by_fd */ + struct pollfd *event_set; + struct event **event_r_back; + struct event **event_w_back; + int *idxplus1_by_fd; /* Index into event_set by fd; we add 1 so + * that 0 (which is easy to memset) can mean + * "no entry." */ +}; + +void *poll_init (void); +int poll_add (void *, struct event *); +int poll_del (void *, struct event *); +int poll_recalc (struct event_base *, void *, int); +int poll_dispatch (struct event_base *, void *, struct timeval *); +void poll_dealloc (void *); + +const struct eventop pollops = { + "poll", + poll_init, + poll_add, + poll_del, + poll_recalc, + poll_dispatch, + poll_dealloc +}; + +void * +poll_init(void) +{ + struct pollop *pollop; + + /* Disable poll when this environment variable is set */ + if (getenv("EVENT_NOPOLL")) + return (NULL); + + if (!(pollop = calloc(1, sizeof(struct pollop)))) + return (NULL); + + evsignal_init(); + + return (pollop); +} + +/* + * Called with the highest fd that we know about. If it is 0, completely + * recalculate everything. + */ + +int +poll_recalc(struct event_base *base, void *arg, int max) +{ + return (0); +} + +#ifdef CHECK_INVARIANTS +static void +poll_check_ok(struct pollop *pop) +{ + int i, idx; + struct event *ev; + + for (i = 0; i < pop->fd_count; ++i) { + idx = pop->idxplus1_by_fd[i]-1; + if (idx < 0) + continue; + assert(pop->event_set[idx].fd == i); + if (pop->event_set[idx].events & POLLIN) { + ev = pop->event_r_back[idx]; + assert(ev); + assert(ev->ev_events & EV_READ); + assert(ev->ev_fd == i); + } + if (pop->event_set[idx].events & POLLOUT) { + ev = pop->event_w_back[idx]; + assert(ev); + assert(ev->ev_events & EV_WRITE); + assert(ev->ev_fd == i); + } + } + for (i = 0; i < pop->nfds; ++i) { + struct pollfd *pfd = &pop->event_set[i]; + assert(pop->idxplus1_by_fd[pfd->fd] == i+1); + } +} +#else +#define poll_check_ok(pop) +#endif + +int +poll_dispatch(struct event_base *base, void *arg, struct timeval *tv) +{ + int res, i, sec, nfds; + struct pollop *pop = arg; + + poll_check_ok(pop); + sec = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000; + nfds = pop->nfds; + res = poll(pop->event_set, nfds, sec); + + if (res == -1) { + if (errno != EINTR) { + event_warn("poll"); + return (-1); + } + + evsignal_process(); + return (0); + } else if (evsignal_caught) + evsignal_process(); + + event_debug(("%s: poll reports %d", __func__, res)); + + if (res == 0) + return (0); + + for (i = 0; i < nfds; i++) { + int what = pop->event_set[i].revents; + struct event *r_ev = NULL, *w_ev = NULL; + if (!what) + continue; + + res = 0; + + /* If the file gets closed notify */ + if (what & (POLLHUP|POLLERR)) + what |= POLLIN|POLLOUT; + if (what & POLLIN) { + res |= EV_READ; + r_ev = pop->event_r_back[i]; + } + if (what & POLLOUT) { + res |= EV_WRITE; + w_ev = pop->event_w_back[i]; + } + if (res == 0) + continue; + + if (r_ev && (res & r_ev->ev_events)) { + if (!(r_ev->ev_events & EV_PERSIST)) + event_del(r_ev); + event_active(r_ev, res & r_ev->ev_events, 1); + } + if (w_ev && w_ev != r_ev && (res & w_ev->ev_events)) { + if (!(w_ev->ev_events & EV_PERSIST)) + event_del(w_ev); + event_active(w_ev, res & w_ev->ev_events, 1); + } + } + + return (0); +} + +int +poll_add(void *arg, struct event *ev) +{ + struct pollop *pop = arg; + struct pollfd *pfd = NULL; + int i; + + if (ev->ev_events & EV_SIGNAL) + return (evsignal_add(ev)); + if (!(ev->ev_events & (EV_READ|EV_WRITE))) + return (0); + + poll_check_ok(pop); + if (pop->nfds + 1 >= pop->event_count) { + struct pollfd *tmp_event_set; + struct event **tmp_event_r_back; + struct event **tmp_event_w_back; + int tmp_event_count; + + if (pop->event_count < 32) + tmp_event_count = 32; + else + tmp_event_count = pop->event_count * 2; + + /* We need more file descriptors */ + tmp_event_set = realloc(pop->event_set, + tmp_event_count * sizeof(struct pollfd)); + if (tmp_event_set == NULL) { + event_warn("realloc"); + return (-1); + } + pop->event_set = tmp_event_set; + + tmp_event_r_back = realloc(pop->event_r_back, + tmp_event_count * sizeof(struct event *)); + if (tmp_event_r_back == NULL) { + /* event_set overallocated; that's okay. */ + event_warn("realloc"); + return (-1); + } + pop->event_r_back = tmp_event_r_back; + + tmp_event_w_back = realloc(pop->event_w_back, + tmp_event_count * sizeof(struct event *)); + if (tmp_event_w_back == NULL) { + /* event_set and event_r_back overallocated; that's + * okay. */ + event_warn("realloc"); + return (-1); + } + pop->event_w_back = tmp_event_w_back; + + pop->event_count = tmp_event_count; + } + if (ev->ev_fd >= pop->fd_count) { + int *tmp_idxplus1_by_fd; + int new_count; + if (pop->fd_count < 32) + new_count = 32; + else + new_count = pop->fd_count * 2; + while (new_count <= ev->ev_fd) + new_count *= 2; + tmp_idxplus1_by_fd = + realloc(pop->idxplus1_by_fd, new_count * sizeof(int)); + if (tmp_idxplus1_by_fd == NULL) { + event_warn("realloc"); + return (-1); + } + pop->idxplus1_by_fd = tmp_idxplus1_by_fd; + memset(pop->idxplus1_by_fd + pop->fd_count, + 0, sizeof(int)*(new_count - pop->fd_count)); + pop->fd_count = new_count; + } + + i = pop->idxplus1_by_fd[ev->ev_fd] - 1; + if (i >= 0) { + pfd = &pop->event_set[i]; + } else { + i = pop->nfds++; + pfd = &pop->event_set[i]; + pfd->events = 0; + pfd->fd = ev->ev_fd; + pop->event_w_back[i] = pop->event_r_back[i] = NULL; + pop->idxplus1_by_fd[ev->ev_fd] = i + 1; + } + + pfd->revents = 0; + if (ev->ev_events & EV_WRITE) { + pfd->events |= POLLOUT; + pop->event_w_back[i] = ev; + } + if (ev->ev_events & EV_READ) { + pfd->events |= POLLIN; + pop->event_r_back[i] = ev; + } + poll_check_ok(pop); + + return (0); +} + +/* + * Nothing to be done here. + */ + +int +poll_del(void *arg, struct event *ev) +{ + struct pollop *pop = arg; + struct pollfd *pfd = NULL; + int i; + + if (ev->ev_events & EV_SIGNAL) + return (evsignal_del(ev)); + + if (!(ev->ev_events & (EV_READ|EV_WRITE))) + return (0); + + poll_check_ok(pop); + i = pop->idxplus1_by_fd[ev->ev_fd] - 1; + if (i < 0) + return (-1); + + /* Do we still want to read or write? */ + pfd = &pop->event_set[i]; + if (ev->ev_events & EV_READ) { + pfd->events &= ~POLLIN; + pop->event_r_back[i] = NULL; + } + if (ev->ev_events & EV_WRITE) { + pfd->events &= ~POLLOUT; + pop->event_w_back[i] = NULL; + } + poll_check_ok(pop); + if (pfd->events) + /* Another event cares about that fd. */ + return (0); + + /* Okay, so we aren't interested in that fd anymore. */ + pop->idxplus1_by_fd[ev->ev_fd] = 0; + + --pop->nfds; + if (i != pop->nfds) { + /* + * Shift the last pollfd down into the now-unoccupied + * position. + */ + memcpy(&pop->event_set[i], &pop->event_set[pop->nfds], + sizeof(struct pollfd)); + pop->event_r_back[i] = pop->event_r_back[pop->nfds]; + pop->event_w_back[i] = pop->event_w_back[pop->nfds]; + pop->idxplus1_by_fd[pop->event_set[i].fd] = i + 1; + } + + poll_check_ok(pop); + return (0); +} + +void +poll_dealloc(void *arg) +{ + struct pollop *pop = arg; + + if (pop->event_set) + free(pop->event_set); + if (pop->event_r_back) + free(pop->event_r_back); + if (pop->event_w_back) + free(pop->event_w_back); + if (pop->idxplus1_by_fd) + free(pop->idxplus1_by_fd); + + memset(pop, 0, sizeof(struct pollop)); + free(pop); +} diff --git a/contrib/pf/libevent/select.c b/contrib/pf/libevent/select.c new file mode 100644 index 0000000..6ce81a2 --- /dev/null +++ b/contrib/pf/libevent/select.c @@ -0,0 +1,370 @@ +/* $OpenBSD: select.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */ + +/* + * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#else +#include <sys/_time.h> +#endif +#include <sys/queue.h> +#include <sys/tree.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#ifdef CHECK_INVARIANTS +#include <assert.h> +#endif + +#include "event.h" +#include "event-internal.h" +#include "evsignal.h" +#include "log.h" + +#ifndef howmany +#define howmany(x, y) (((x)+((y)-1))/(y)) +#endif + +extern volatile sig_atomic_t evsignal_caught; + +struct selectop { + int event_fds; /* Highest fd in fd set */ + int event_fdsz; + fd_set *event_readset_in; + fd_set *event_writeset_in; + fd_set *event_readset_out; + fd_set *event_writeset_out; + struct event **event_r_by_fd; + struct event **event_w_by_fd; +}; + +void *select_init (void); +int select_add (void *, struct event *); +int select_del (void *, struct event *); +int select_recalc (struct event_base *, void *, int); +int select_dispatch (struct event_base *, void *, struct timeval *); +void select_dealloc (void *); + +const struct eventop selectops = { + "select", + select_init, + select_add, + select_del, + select_recalc, + select_dispatch, + select_dealloc +}; + +static int select_resize(struct selectop *sop, int fdsz); + +void * +select_init(void) +{ + struct selectop *sop; + + /* Disable select when this environment variable is set */ + if (getenv("EVENT_NOSELECT")) + return (NULL); + + if (!(sop = calloc(1, sizeof(struct selectop)))) + return (NULL); + + select_resize(sop, howmany(32 + 1, NFDBITS)*sizeof(fd_mask)); + + evsignal_init(); + + return (sop); +} + +#ifdef CHECK_INVARIANTS +static void +check_selectop(struct selectop *sop) +{ + int i; + for (i=0;i<=sop->event_fds;++i) { + if (FD_ISSET(i, sop->event_readset_in)) { + assert(sop->event_r_by_fd[i]); + assert(sop->event_r_by_fd[i]->ev_events & EV_READ); + assert(sop->event_r_by_fd[i]->ev_fd == i); + } else { + assert(! sop->event_r_by_fd[i]); + } + if (FD_ISSET(i, sop->event_writeset_in)) { + assert(sop->event_w_by_fd[i]); + assert(sop->event_w_by_fd[i]->ev_events & EV_WRITE); + assert(sop->event_w_by_fd[i]->ev_fd == i); + } else { + assert(! sop->event_w_by_fd[i]); + } + } + +} +#else +#define check_selectop(sop) do { (void) sop; } while (0) +#endif + +/* + * Called with the highest fd that we know about. If it is 0, completely + * recalculate everything. + */ + +int +select_recalc(struct event_base *base, void *arg, int max) +{ + struct selectop *sop = arg; + + check_selectop(sop); + + return (0); +} + +int +select_dispatch(struct event_base *base, void *arg, struct timeval *tv) +{ + int res, i; + struct selectop *sop = arg; + + check_selectop(sop); + + memcpy(sop->event_readset_out, sop->event_readset_in, + sop->event_fdsz); + memcpy(sop->event_writeset_out, sop->event_writeset_in, + sop->event_fdsz); + + res = select(sop->event_fds + 1, sop->event_readset_out, + sop->event_writeset_out, NULL, tv); + + check_selectop(sop); + + if (res == -1) { + if (errno != EINTR) { + event_warn("select"); + return (-1); + } + + evsignal_process(); + return (0); + } else if (evsignal_caught) + evsignal_process(); + + event_debug(("%s: select reports %d", __func__, res)); + + check_selectop(sop); + for (i = 0; i <= sop->event_fds; ++i) { + struct event *r_ev = NULL, *w_ev = NULL; + res = 0; + if (FD_ISSET(i, sop->event_readset_out)) { + r_ev = sop->event_r_by_fd[i]; + res |= EV_READ; + } + if (FD_ISSET(i, sop->event_writeset_out)) { + w_ev = sop->event_w_by_fd[i]; + res |= EV_WRITE; + } + if (r_ev && (res & r_ev->ev_events)) { + if (!(r_ev->ev_events & EV_PERSIST)) + event_del(r_ev); + event_active(r_ev, res & r_ev->ev_events, 1); + } + if (w_ev && w_ev != r_ev && (res & w_ev->ev_events)) { + if (!(w_ev->ev_events & EV_PERSIST)) + event_del(w_ev); + event_active(w_ev, res & w_ev->ev_events, 1); + } + } + check_selectop(sop); + + return (0); +} + + +static int +select_resize(struct selectop *sop, int fdsz) +{ + int n_events, n_events_old; + + fd_set *readset_in = NULL; + fd_set *writeset_in = NULL; + fd_set *readset_out = NULL; + fd_set *writeset_out = NULL; + struct event **r_by_fd = NULL; + struct event **w_by_fd = NULL; + + n_events = (fdsz/sizeof(fd_mask)) * NFDBITS; + n_events_old = (sop->event_fdsz/sizeof(fd_mask)) * NFDBITS; + + if (sop->event_readset_in) + check_selectop(sop); + + if ((readset_in = realloc(sop->event_readset_in, fdsz)) == NULL) + goto error; + sop->event_readset_in = readset_in; + if ((readset_out = realloc(sop->event_readset_out, fdsz)) == NULL) + goto error; + sop->event_readset_out = readset_out; + if ((writeset_in = realloc(sop->event_writeset_in, fdsz)) == NULL) + goto error; + sop->event_writeset_in = writeset_in; + if ((writeset_out = realloc(sop->event_writeset_out, fdsz)) == NULL) + goto error; + sop->event_writeset_out = writeset_out; + if ((r_by_fd = realloc(sop->event_r_by_fd, + n_events*sizeof(struct event*))) == NULL) + goto error; + sop->event_r_by_fd = r_by_fd; + if ((w_by_fd = realloc(sop->event_w_by_fd, + n_events * sizeof(struct event*))) == NULL) + goto error; + sop->event_w_by_fd = w_by_fd; + + memset((char *)sop->event_readset_in + sop->event_fdsz, 0, + fdsz - sop->event_fdsz); + memset((char *)sop->event_writeset_in + sop->event_fdsz, 0, + fdsz - sop->event_fdsz); + memset(sop->event_r_by_fd + n_events_old, 0, + (n_events-n_events_old) * sizeof(struct event*)); + memset(sop->event_w_by_fd + n_events_old, 0, + (n_events-n_events_old) * sizeof(struct event*)); + + sop->event_fdsz = fdsz; + check_selectop(sop); + + return (0); + + error: + event_warn("malloc"); + return (-1); +} + + +int +select_add(void *arg, struct event *ev) +{ + struct selectop *sop = arg; + + if (ev->ev_events & EV_SIGNAL) + return (evsignal_add(ev)); + + check_selectop(sop); + /* + * Keep track of the highest fd, so that we can calculate the size + * of the fd_sets for select(2) + */ + if (sop->event_fds < ev->ev_fd) { + int fdsz = sop->event_fdsz; + + if (fdsz < sizeof(fd_mask)) + fdsz = sizeof(fd_mask); + + while (fdsz < + (howmany(ev->ev_fd + 1, NFDBITS) * sizeof(fd_mask))) + fdsz *= 2; + + if (fdsz != sop->event_fdsz) { + if (select_resize(sop, fdsz)) { + check_selectop(sop); + return (-1); + } + } + + sop->event_fds = ev->ev_fd; + } + + if (ev->ev_events & EV_READ) { + FD_SET(ev->ev_fd, sop->event_readset_in); + sop->event_r_by_fd[ev->ev_fd] = ev; + } + if (ev->ev_events & EV_WRITE) { + FD_SET(ev->ev_fd, sop->event_writeset_in); + sop->event_w_by_fd[ev->ev_fd] = ev; + } + check_selectop(sop); + + return (0); +} + +/* + * Nothing to be done here. + */ + +int +select_del(void *arg, struct event *ev) +{ + struct selectop *sop = arg; + + check_selectop(sop); + if (ev->ev_events & EV_SIGNAL) + return (evsignal_del(ev)); + + if (sop->event_fds < ev->ev_fd) { + check_selectop(sop); + return (0); + } + + if (ev->ev_events & EV_READ) { + FD_CLR(ev->ev_fd, sop->event_readset_in); + sop->event_r_by_fd[ev->ev_fd] = NULL; + } + + if (ev->ev_events & EV_WRITE) { + FD_CLR(ev->ev_fd, sop->event_writeset_in); + sop->event_w_by_fd[ev->ev_fd] = NULL; + } + + check_selectop(sop); + return (0); +} + +void +select_dealloc(void *arg) +{ + struct selectop *sop = arg; + + if (sop->event_readset_in) + free(sop->event_readset_in); + if (sop->event_writeset_in) + free(sop->event_writeset_in); + if (sop->event_readset_out) + free(sop->event_readset_out); + if (sop->event_writeset_out) + free(sop->event_writeset_out); + if (sop->event_r_by_fd) + free(sop->event_r_by_fd); + if (sop->event_w_by_fd) + free(sop->event_w_by_fd); + + memset(sop, 0, sizeof(struct selectop)); + free(sop); +} diff --git a/contrib/pf/libevent/signal.c b/contrib/pf/libevent/signal.c new file mode 100644 index 0000000..71bcffc --- /dev/null +++ b/contrib/pf/libevent/signal.c @@ -0,0 +1,180 @@ +/* $OpenBSD: select.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */ + +/* + * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#else +#include <sys/_time.h> +#endif +#include <sys/queue.h> +#include <sys/socket.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +#include "event.h" +#include "evsignal.h" +#include "log.h" + +extern struct event_list signalqueue; + +static sig_atomic_t evsigcaught[NSIG]; +volatile sig_atomic_t evsignal_caught = 0; + +static struct event ev_signal; +static int ev_signal_pair[2]; +static int ev_signal_added; + +static void evsignal_handler(int sig); + +/* Callback for when the signal handler write a byte to our signaling socket */ +static void +evsignal_cb(int fd, short what, void *arg) +{ + static char signals[100]; + struct event *ev = arg; + ssize_t n; + + n = read(fd, signals, sizeof(signals)); + if (n == -1) + event_err(1, "%s: read", __func__); + event_add(ev, NULL); +} + +#ifdef HAVE_SETFD +#define FD_CLOSEONEXEC(x) do { \ + if (fcntl(x, F_SETFD, 1) == -1) \ + event_warn("fcntl(%d, F_SETFD)", x); \ +} while (0) +#else +#define FD_CLOSEONEXEC(x) +#endif + +void +evsignal_init(void) +{ + /* + * Our signal handler is going to write to one end of the socket + * pair to wake up our event loop. The event loop then scans for + * signals that got delivered. + */ + if (socketpair(AF_UNIX, SOCK_STREAM, 0, ev_signal_pair) == -1) + event_err(1, "%s: socketpair", __func__); + + FD_CLOSEONEXEC(ev_signal_pair[0]); + FD_CLOSEONEXEC(ev_signal_pair[1]); + + fcntl(ev_signal_pair[0], F_SETFL, O_NONBLOCK); + + event_set(&ev_signal, ev_signal_pair[1], EV_READ, + evsignal_cb, &ev_signal); + ev_signal.ev_flags |= EVLIST_INTERNAL; +} + +int +evsignal_add(struct event *ev) +{ + int evsignal; + struct sigaction sa; + + if (ev->ev_events & (EV_READ|EV_WRITE)) + event_errx(1, "%s: EV_SIGNAL incompatible use", __func__); + evsignal = EVENT_SIGNAL(ev); + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = evsignal_handler; + sigfillset(&sa.sa_mask); + sa.sa_flags |= SA_RESTART; + + if (sigaction(evsignal, &sa, NULL) == -1) + return (-1); + + if (!ev_signal_added) { + ev_signal_added = 1; + event_add(&ev_signal, NULL); + } + + return (0); +} + +/* + * Nothing to be done here. + */ + +int +evsignal_del(struct event *ev) +{ + int evsignal; + + evsignal = EVENT_SIGNAL(ev); + + return (sigaction(EVENT_SIGNAL(ev),(struct sigaction *)SIG_DFL, NULL)); +} + +static void +evsignal_handler(int sig) +{ + int save_errno = errno; + + evsigcaught[sig]++; + evsignal_caught = 1; + + /* Wake up our notification mechanism */ + write(ev_signal_pair[0], "a", 1); + errno = save_errno; +} + +void +evsignal_process(void) +{ + struct event *ev; + sig_atomic_t ncalls; + + evsignal_caught = 0; + TAILQ_FOREACH(ev, &signalqueue, ev_signal_next) { + ncalls = evsigcaught[EVENT_SIGNAL(ev)]; + if (ncalls) { + if (!(ev->ev_events & EV_PERSIST)) + event_del(ev); + event_active(ev, EV_SIGNAL, ncalls); + evsigcaught[EVENT_SIGNAL(ev)] = 0; + } + } +} + diff --git a/contrib/pf/man/pf.4 b/contrib/pf/man/pf.4 index a394627..1164202 100644 --- a/contrib/pf/man/pf.4 +++ b/contrib/pf/man/pf.4 @@ -1,4 +1,4 @@ -.\" $OpenBSD: pf.4,v 1.54 2004/12/22 17:17:55 dhartmei Exp $ +.\" $OpenBSD: pf.4,v 1.58 2007/02/09 11:39:06 henning Exp $ .\" .\" Copyright (C) 2001, Kjell Wooding. All rights reserved. .\" @@ -184,6 +184,11 @@ using the obtained through a preceding .Dv DIOCGETRULES call. +If +.Va action +is set to +.Dv PF_GET_CLR_CNTR , +the per-rule statistics on the requested rule are cleared. .It Dv DIOCGETADDRS Fa "struct pfioc_pooladdr *pp" Get a .Va ticket @@ -346,6 +351,7 @@ struct pf_status { u_int32_t debug; u_int32_t hostid; char ifname[IFNAMSIZ]; + u_int8_t pf_chksum[MD5_DIGEST_LENGTH]; }; .Ed .It Dv DIOCCLRSTATUS @@ -389,19 +395,14 @@ struct pfioc_states { .Pp If .Va ps_len -is zero, all states will be gathered into -.Va pf_states -and +is non-zero on entry, as many states as possible that can fit into this +size will be copied into the supplied buffer +.Va ps_states . +On exit, .Va ps_len -will be set to the size they take in memory (i.e., +is always set to the total size required to hold all state table entries +(i.e., it is set to .Li sizeof(struct pf_state) * nr ) . -If -.Va ps_len -is non-zero, as many states that can fit into -.Va ps_len -as possible will be gathered, and -.Va ps_len -will be updated to the size those rules take in memory. .It Dv DIOCCHANGERULE Fa "struct pfioc_rule *pcr" Add or remove the .Va rule @@ -483,7 +484,8 @@ struct pfioc_limit { unsigned limit; }; -enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS }; +enum { PF_LIMIT_STATES, PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, + PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; .Ed .It Dv DIOCGETLIMIT Fa "struct pfioc_limit *pl" Get the hard @@ -521,10 +523,15 @@ struct pfioc_table { .It Dv DIOCRADDTABLES Fa "struct pfioc_table *io" Create one or more tables. On entry, -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_table -structures. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_table . On exit, .Va pfrio_nadd contains the number of tables effectively created. @@ -539,12 +546,17 @@ struct pfr_table { .It Dv DIOCRDELTABLES Fa "struct pfioc_table *io" Delete one or more tables. On entry, -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_table -structures. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_table . On exit, -.Va pfrio_nadd +.Va pfrio_ndel contains the number of tables effectively deleted. .It Dv DIOCRGETTABLES Fa "struct pfioc_table *io" Get the list of all tables. @@ -583,10 +595,15 @@ struct pfr_tstats { .It Dv DIOCRCLRTSTATS Fa "struct pfioc_table *io" Clear the statistics of one or more tables. On entry, -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_table -structures. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_table . On exit, .Va pfrio_nzero contains the number of tables effectively cleared. @@ -603,10 +620,15 @@ Add one or more addresses to a table. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains the list of -.Vt pfr_addr -structures to add. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements to add to the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . On exit, .Va pfrio_nadd contains the number of addresses effectively added. @@ -629,10 +651,15 @@ Delete one or more addresses from a table. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains the list of -.Vt pfr_addr -structures to delete. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements to delete from the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . On exit, .Va pfrio_ndel contains the number of addresses effectively deleted. @@ -643,10 +670,15 @@ This is the most complicated command, which uses all the structure members. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains the new list of -.Vt pfr_addr -structures. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements which become the new contents of the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . Additionally, if .Va pfrio_size2 is non-zero, @@ -701,10 +733,15 @@ Clear the statistics of one or more addresses. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_addr -structures to clear. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements to be cleared from the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . On exit, .Va pfrio_nzero contains the number of addresses effectively cleared. @@ -713,13 +750,18 @@ Test if the given addresses match a table. On entry, .Va pfrio_table contains the table ID and -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_addr -structures to test. +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_addr +containing at least +.Vt pfrio_size +elements, each of which will be tested for a match in the table. +.Vt pfrio_esize +must be the size of +.Vt struct pfr_addr . On exit, the kernel updates the .Vt pfr_addr -table by setting the +array by setting the .Va pfra_fback member appropriately. .It Dv DIOCRSETTFLAGS Fa "struct pfioc_table *io" @@ -729,14 +771,19 @@ or .Dv PFR_TFLAG_PERSIST flags of a table. On entry, -.Va pfrio_buffer[pfrio_size] -contains a table of -.Vt pfr_table -structures, and +.Va pfrio_buffer +must point to an array of +.Vt struct pfr_table +containing at least +.Vt pfrio_size +elements. +.Va pfrio_esize +must be the size of +.Vt struct pfr_table . .Va pfrio_setflag -contains the flags to add, while +must contain the flags to add, while .Va pfrio_clrflag -contains the flags to remove. +must contain the flags to remove. On exit, .Va pfrio_nchange and @@ -751,7 +798,7 @@ On entry, .Va pfrio_table contains the table ID and .Va pfrio_buffer[pfrio_size] -contains the list of +contains an array of .Vt pfr_addr structures to put in the table. A valid ticket must also be supplied to @@ -953,10 +1000,6 @@ struct pfioc_iface { int pfiio_nzero; int pfiio_flags; }; - -#define PFI_FLAG_GROUP 0x0001 /* gets groups of interfaces */ -#define PFI_FLAG_INSTANCE 0x0002 /* gets single interfaces */ -#define PFI_FLAG_ALLMASK 0x0003 .Ed .Pp If not empty, @@ -966,61 +1009,45 @@ can be used to restrict the search to a specific interface or driver. is the user-supplied buffer for returning the data. On entry, .Va pfiio_size -represents the number of -.Va pfi_if +contains the number of +.Vt pfi_kif entries that can fit into the buffer. The kernel will replace this value by the real number of entries it wants to return. .Va pfiio_esize should be set to -.Li sizeof(struct pfi_if) . -.Va pfiio_flags -should be set to -.Dv PFI_FLAG_GROUP , -.Dv PFI_FLAG_INSTANCE , -or both, to tell the kernel to return a group of interfaces -(drivers, like "fxp"), real interface instances (like "fxp1") or both. +.Li sizeof(struct pfi_kif) . +.Pp The data is returned in the -.Vt pfi_if +.Vt pfi_kif structure described below: .Bd -literal -struct pfi_if { - char pfif_name[IFNAMSIZ]; - u_int64_t pfif_packets[2][2][2]; - u_int64_t pfif_bytes[2][2][2]; - u_int64_t pfif_addcnt; - u_int64_t pfif_delcnt; - long pfif_tzero; - int pfif_states; - int pfif_rules; - int pfif_flags; +struct pfi_kif { + RB_ENTRY(pfi_kif) pfik_tree; + char pfik_name[IFNAMSIZ]; + u_int64_t pfik_packets[2][2][2]; + u_int64_t pfik_bytes[2][2][2]; + u_int32_t pfik_tzero; + int pfik_flags; + struct pf_state_tree_lan_ext pfik_lan_ext; + struct pf_state_tree_ext_gwy pfik_ext_gwy; + TAILQ_ENTRY(pfi_kif) pfik_w_states; + void *pfik_ah_cookie; + struct ifnet *pfik_ifp; + struct ifg_group *pfik_group; + int pfik_states; + int pfik_rules; + TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; }; - -#define PFI_IFLAG_GROUP 0x0001 /* group of interfaces */ -#define PFI_IFLAG_INSTANCE 0x0002 /* single instance */ -#define PFI_IFLAG_CLONABLE 0x0010 /* clonable group */ -#define PFI_IFLAG_DYNAMIC 0x0020 /* dynamic group */ -#define PFI_IFLAG_ATTACHED 0x0040 /* interface attached */ .Ed -.It Dv DIOCICLRISTATS Fa "struct pfioc_iface *io" -Clear the statistics counters of one or more interfaces. -.Va pfiio_name -and -.Va pfiio_flags -can be used to select which interfaces need to be cleared. -The filtering process is the same as for -.Dv DIOCIGETIFACES . -.Va pfiio_nzero -will be set by the kernel to the number of interfaces and drivers -that have been cleared. .It Dv DIOCSETIFFLAG Fa "struct pfioc_iface *io" -Set the user setable flags (described below) of the pf internal interface -description. +Set the user setable flags (described above) of the +.Nm +internal interface description. The filtering process is the same as for .Dv DIOCIGETIFACES . .Bd -literal -#define PFI_IFLAG_SKIP 0x0100 /* skip interface */ -#define PFI_IFLAG_SETABLE_MASK 0x0100 /* mask */ +#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ .Ed .It Dv DIOCCLRIFFLAG Fa "struct pfioc_iface *io" Works as diff --git a/contrib/pf/man/pf.conf.5 b/contrib/pf/man/pf.conf.5 index 817fa0b..bb210fc 100644 --- a/contrib/pf/man/pf.conf.5 +++ b/contrib/pf/man/pf.conf.5 @@ -1,4 +1,4 @@ -.\" $OpenBSD: pf.conf.5,v 1.326 2005/03/01 18:10:44 jmc Exp $ +.\" $OpenBSD: pf.conf.5,v 1.376 2006/12/01 07:23:26 camield Exp $ .\" .\" Copyright (c) 2002, Daniel Hartmeier .\" All rights reserved. @@ -62,8 +62,7 @@ Queueing provides rule-based bandwidth control. Translation rules specify how addresses are to be mapped or redirected to other addresses. .It Cm Packet Filtering -Stateful and stateless packet filtering provides rule-based blocking or -passing of packets. +Packet filtering provides rule-based blocking or passing of packets. .El .Pp With the exception of @@ -80,11 +79,7 @@ enforces this order (see .Ar set require-order below). .Sh MACROS -Much like -.Xr cpp 1 -or -.Xr m4 1 , -macros can be defined that will later be expanded in context. +Macros can be defined that will later be expanded in context. Macro names must start with a letter, and may contain letters, digits and underscores. Macro names may not be reserved words (for example @@ -97,8 +92,8 @@ For example, .Bd -literal -offset indent ext_if = \&"kue0\&" all_ifs = \&"{\&" $ext_if lo0 \&"}\&" -pass out on $ext_if from any to any keep state -pass in on $ext_if proto tcp from any to any port 25 keep state +pass out on $ext_if from any to any +pass in on $ext_if proto tcp from any to any port 25 .Ed .Sh TABLES Tables are named structures which can hold a collection of addresses and @@ -181,9 +176,9 @@ when running with .Pp For example, .Bd -literal -offset indent -table <private> const { 10/8, 172.16/12, 192.168/16 } -table <badhosts> persist -block on fxp0 from { <private>, <badhosts> } to any +table \*(Ltprivate\*(Gt const { 10/8, 172.16/12, 192.168/16 } +table \*(Ltbadhosts\*(Gt persist +block on fxp0 from { \*(Ltprivate\*(Gt, \*(Ltbadhosts\*(Gt } to any .Ed .Pp creates a table called private, to hold RFC 1918 private network @@ -201,8 +196,8 @@ these hosts can be blocked by using A table can also be initialized with an address list specified in one or more external files, using the following syntax: .Bd -literal -offset indent -table <spam> persist file \&"/etc/spammers\&" file \&"/etc/openrelays\&" -block on fxp0 from <spam> to any +table \*(Ltspam\*(Gt persist file \&"/etc/spammers\&" file \&"/etc/openrelays\&" +block on fxp0 from \*(Ltspam\*(Gt to any .Ed .Pp The files @@ -217,7 +212,7 @@ When the resolver is called to add a hostname to a table, .Em all resulting IPv4 and IPv6 addresses are placed into the table. IP addresses can also be entered in a table by specifying a valid interface -name or the +name, a valid interface group or the .Em self keyword, in which case all addresses assigned to the interface(s) will be added to the table. @@ -310,7 +305,12 @@ This value is used to define the scale factor, it should not actually be reached (set a lower state limit, see below). .El .Pp -These values can be defined both globally and for each rule. +Adaptive timeouts are enabled by default, with an adaptive.start value +equal to 60% of the state limit, and an adaptive.end value equal to +120% of the state limit. +They can be disabled by setting both adaptive.start and adaptive.end to 0. +.Pp +The adaptive timeout values can be defined both globally and for each rule. When used on a per-rule basis, the values relate to the number of states created by the rule, otherwise to the total number of states. @@ -358,8 +358,10 @@ set limit states 20000 .Pp sets the maximum number of entries in the memory pool used by state table entries (generated by -.Ar keep state -rules) to 20000. +.Ar pass +rules which do not specify +.Ar no state ) +to 20000. Using .Bd -literal -offset indent set limit frags 20000 @@ -369,7 +371,7 @@ sets the maximum number of entries in the memory pool used for fragment reassembly (generated by .Ar scrub rules) to 20000. -Finally, +Using .Bd -literal -offset indent set limit src-nodes 2000 .Ed @@ -378,16 +380,63 @@ sets the maximum number of entries in the memory pool used for tracking source IP addresses (generated by the .Ar sticky-address and -.Ar source-track +.Ar src.track options) to 2000. +Using +.Bd -literal -offset indent +set limit tables 1000 +set limit table-entries 100000 +.Ed +.Pp +sets limits on the memory pools used by tables. +The first limits the number of tables that can exist to 1000. +The second limits the overall number of addresses that can be stored +in tables to 100000. .Pp -These can be combined: +Various limits can be combined on a single line: .Bd -literal -offset indent set limit { states 20000, frags 20000, src-nodes 2000 } .Ed .Pp +.It Ar set ruleset-optimization +.Bl -tag -width xxxxxxxx -compact +.It Ar none +Disable the ruleset optimizer. +This is the default behaviour. +.It Ar basic +Enable basic ruleset optimization, which does four things to improve the +performance of ruleset evaluations: +.Pp +.Bl -enum -compact +.It +remove duplicate rules +.It +remove rules that are a subset of another rule +.It +combine multiple rules into a table when advantageous +.It +re-order the rules to improve evaluation performance +.El +.Pp +.It Ar profile +Uses the currently loaded ruleset as a feedback profile to tailor the +ordering of quick rules to actual network traffic. +.El +.Pp +It is important to note that the ruleset optimizer will modify the ruleset +to improve performance. +A side effect of the ruleset modification is that per-rule accounting +statistics will have different meanings than before. +If per-rule accounting is important for billing purposes or whatnot, +either the ruleset optimizer should not be used or a label field should +be added to all of the accounting rules to act as optimization barriers. +.Pp +Optimization can also be set as a command-line argument to +.Xr pfctl 8 , +overriding the settings in +.Nm . .It Ar set optimization -Optimize the engine for one of the following network environments: +Optimize state timeouts for one of the following network environments: .Pp .Bl -tag -width xxxx -compact .It Ar normal @@ -442,8 +491,6 @@ option sets the default behaviour for states: .Bl -tag -width group-bound -compact .It Ar if-bound States are bound to interface. -.It Ar group-bound -States are bound to interface group (i.e. ppp) .It Ar floating States can match packets on any interfaces (the default). .El @@ -452,6 +499,21 @@ For example: .Bd -literal -offset indent set state-policy if-bound .Ed +.It Ar set hostid +The 32-bit +.Ar hostid +identifies this firewall's state table entries to other firewalls +in a +.Xr pfsync 4 +failover cluster. +By default the hostid is set to a pseudo-random value, however it may be +desirable to manually configure it, for example to more easily identify the +source of state table entries. +.Bd -literal -offset indent +set hostid 1 +.Ed +.Pp +The hostid may be specified in either decimal or hexadecimal. .It Ar set require-order By default .Xr pfctl 8 @@ -483,7 +545,7 @@ For example: .Pp .Dl set fingerprints \&"/etc/pf.os.devel\&" .Pp -.It Ar set skip on <ifspec> +.It Ar set skip on Aq Ar ifspec List interfaces for which packets should not be filtered. Packets passing in or out on such interfaces are passed as if pf was disabled, i.e. pf does not process them in any way. @@ -550,9 +612,9 @@ Using the modifier (see below) is recommended in combination with the .Ar no-df modifier to ensure unique IP identifiers. -.It Ar min-ttl <number> +.It Ar min-ttl Aq Ar number Enforces a minimum TTL for matching IP packets. -.It Ar max-mss <number> +.It Ar max-mss Aq Ar number Enforces a maximum MSS for matching TCP packets. .It Ar random-id Replaces the IP identification field with random values to compensate @@ -763,9 +825,9 @@ declaration. .Ar altq on has the following keywords: .Bl -tag -width xxxx -.It Ar <interface> +.It Aq Ar interface Queueing is enabled on the named interface. -.It Ar <scheduler> +.It Aq Ar scheduler Specifies which queueing scheduler to use. Currently supported values are @@ -775,7 +837,7 @@ for Class Based Queueing, for Priority Queueing and .Ar hfsc for the Hierarchical Fair Service Curve scheduler. -.It Ar bandwidth <bw> +.It Ar bandwidth Aq Ar bw The maximum bitrate for all queues on an interface may be specified using the .Ar bandwidth @@ -793,15 +855,17 @@ gigabits per second, respectively. The value must not exceed the interface bandwidth. If .Ar bandwidth -is not specified, the interface bandwidth is used. -.It Ar qlimit <limit> +is not specified, the interface bandwidth is used +(but take note that some interfaces do not know their bandwidth, +or can adapt their bandwidth rates). +.It Ar qlimit Aq Ar limit The maximum number of packets held in the queue. The default is 50. -.It Ar tbrsize <size> +.It Ar tbrsize Aq Ar size Adjusts the size, in bytes, of the token bucket regulator. If not specified, heuristics based on the interface bandwidth are used to determine the size. -.It Ar queue <list> +.It Ar queue Aq Ar list Defines a list of subqueues to create on an interface. .El .Pp @@ -830,10 +894,10 @@ in a parent declaration. The following keywords can be used: .Bl -tag -width xxxx -.It Ar on <interface> +.It Ar on Aq Ar interface Specifies the interface the queue operates on. If not given, it operates on all matching interfaces. -.It Ar bandwidth <bw> +.It Ar bandwidth Aq Ar bw Specifies the maximum bitrate to be processed by the queue. This value must not exceed the value of the parent .Ar queue @@ -843,7 +907,7 @@ If not specified, defaults to 100% of the parent queue's bandwidth. The .Ar priq scheduler does not support bandwidth specification. -.It Ar priority <level> +.It Ar priority Aq Ar level Between queues a priority level can be set. For .Ar cbq @@ -859,7 +923,7 @@ queues with a higher priority are always served first. and .Ar Hfsc queues with a higher priority are preferred in the case of overload. -.It Ar qlimit <limit> +.It Ar qlimit Aq Ar limit The maximum number of packets held in the queue. The default is 50. .El @@ -867,7 +931,9 @@ The default is 50. The .Ar scheduler can get additional parameters with -.Ar <scheduler> Ns Li (\& Ar <parameters> No ) . +.Xo Aq Ar scheduler +.Pf ( Aq Ar parameters ) . +.Xc Parameters are as follows: .Bl -tag -width Fl .It Ar default @@ -901,15 +967,16 @@ The .Ar scheduler supports some additional options: .Bl -tag -width Fl -.It Ar realtime <sc> +.It Ar realtime Aq Ar sc The minimum required bandwidth for the queue. -.It Ar upperlimit <sc> +.It Ar upperlimit Aq Ar sc The maximum allowed bandwidth for the queue. -.It Ar linkshare <sc> +.It Ar linkshare Aq Ar sc The bandwidth share of a backlogged queue. .El .Pp -<sc> is an acronym for +.Aq Ar sc +is an acronym for .Ar service curve . .Pp The format for service curve specifications is @@ -973,13 +1040,13 @@ queue ssh_bulk bandwidth 50% priority 0 cbq(borrow) block return out on dc0 inet all queue std pass out on dc0 inet proto tcp from $developerhosts to any port 80 \e - keep state queue developers + queue developers pass out on dc0 inet proto tcp from $employeehosts to any port 80 \e - keep state queue employees + queue employees pass out on dc0 inet proto tcp from any to any port 22 \e - keep state queue(ssh_bulk, ssh_interactive) + queue(ssh_bulk, ssh_interactive) pass out on dc0 inet proto tcp from any to any port 25 \e - keep state queue mail + queue mail .Ed .Sh TRANSLATION Translation rules modify either the source or destination address of the @@ -1039,9 +1106,9 @@ The packet is redirected to another destination and possibly a different port. .Ar rdr rules can optionally specify port ranges instead of single ports. -rdr ... port 2000:2999 -> ... port 4000 +rdr ... port 2000:2999 -\*(Gt ... port 4000 redirects ports 2000 to 2999 (inclusive) to port 4000. -rdr ... port 2000:2999 -> ... port 4000:* +rdr ... port 2000:2999 -\*(Gt ... port 4000:* redirects port 2000 to 4000, 2001 to 4001, ..., 2999 to 4999. .El .Pp @@ -1059,8 +1126,17 @@ Port numbers are never translated with a .Ar binat rule. .Pp -For each packet processed by the translator, the translation rules are -evaluated in sequential order, from first to last. +Evaluation order of the translation rules is dependent on the type +of the translation rules and of the direction of a packet. +.Ar binat +rules are always evaluated first. +Then either the +.Ar rdr +rules are evaluated on an inbound packet or the +.Ar nat +rules on an outbound packet. +Rules of the same type are evaluated in the same order in which they +appear in the ruleset. The first matching rule decides what action is taken. .Pp The @@ -1086,7 +1162,7 @@ or to the firewall itself. Note that redirecting external incoming connections to the loopback address, as in .Bd -literal -offset indent -rdr on ne3 inet proto tcp to port 8025 -> 127.0.0.1 port 25 +rdr on ne3 inet proto tcp to port spamd -\*(Gt 127.0.0.1 port smtp .Ed .Pp will effectively allow an external host to connect to daemons @@ -1122,6 +1198,8 @@ assigned to queues for the purpose of bandwidth control. For each packet processed by the packet filter, the filter rules are evaluated in sequential order, from first to last. The last matching rule decides what action is taken. +If no rule matches the packet, the default action is to pass +the packet. .Pp The following actions can be used in the filter: .Bl -tag -width xxxx @@ -1161,24 +1239,87 @@ Options returning ICMP packets currently have no effect if operates on a .Xr bridge 4 , as the code to support this feature has not yet been implemented. +.Pp +The simplest mechanism to block everything by default and only pass +packets that match explicit rules is specify a first filter rule of: +.Bd -literal -offset indent +block all +.Ed .It Ar pass -The packet is passed. +The packet is passed; +state is created state unless the +.Ar no state +option is specified. .El .Pp -If no rule matches the packet, the default action is -.Ar pass . +By default +.Xr pf 4 +filters packets statefully; the first time a packet matches a +.Ar pass +rule, a state entry is created; for subsequent packets the filter checks +whether the packet matches any state. +If it does, the packet is passed without evaluation of any rules. +After the connection is closed or times out, the state entry is automatically +removed. .Pp -To block everything by default and only pass packets -that match explicit rules, one uses +This has several advantages. +For TCP connections, comparing a packet to a state involves checking +its sequence numbers, as well as TCP timestamps if a +.Ar scrub reassemble tcp +rule applies to the connection. +If these values are outside the narrow windows of expected +values, the packet is dropped. +This prevents spoofing attacks, such as when an attacker sends packets with +a fake source address/port but does not know the connection's sequence +numbers. +Similarly, +.Xr pf 4 +knows how to match ICMP replies to states. +For example, .Bd -literal -offset indent -block all +pass out inet proto icmp all icmp-type echoreq .Ed .Pp -as the first filter rule. +allows echo requests (such as those created by +.Xr ping 8 ) +out statefully, and matches incoming echo replies correctly to states. +.Pp +Also, looking up states is usually faster than evaluating rules. +If there are 50 rules, all of them are evaluated sequentially in O(n). +Even with 50000 states, only 16 comparisons are needed to match a +state, since states are stored in a binary search tree that allows +searches in O(log2 n). +.Pp +Furthermore, correct handling of ICMP error messages is critical to +many protocols, particularly TCP. +.Xr pf 4 +matches ICMP error messages to the correct connection, checks them against +connection parameters, and passes them if appropriate. +For example if an ICMP source quench message referring to a stateful TCP +connection arrives, it will be matched to the state and get passed. .Pp +Finally, state tracking is required for +.Ar nat , binat No and Ar rdr +rules, in order to track address and port translations and reverse the +translation on returning packets. +.Pp +.Xr pf 4 +will also create state for other protocols which are effectively stateless by +nature. +UDP packets are matched to states using only host addresses and ports, +and other protocols are matched to states using only the host addresses. +.Pp +If stateless filtering of individual packets is desired, +the +.Ar no state +keyword can be used to specify that state will not be created +if this is the last matching rule. +A number of parameters can also be set to affect how +.Xr pf 4 +handles state tracking. See -.Sx FILTER EXAMPLES -below. +.Sx STATEFUL TRACKING OPTIONS +below for further details. .Sh PARAMETERS The rule parameters specify the packets to which a rule applies. A packet always comes in on, or goes out through, one interface. @@ -1198,22 +1339,14 @@ nor are specified, the rule will match packets in both directions. .It Ar log In addition to the action specified, a log message is generated. -All packets for that connection are logged, unless the -.Ar keep state , -.Ar modulate state -or -.Ar synproxy state -options are specified, in which case only the -packet that establishes the state is logged. -(See -.Ar keep state , -.Ar modulate state -and -.Ar synproxy state -below). -The logged packets are sent to the +Only the packet that establishes the state is logged, +unless the +.Ar no state +option is specified. +The logged packets are sent to a .Xr pflog 4 -interface. +interface, by default +.Ar pflog0 . This interface is monitored by the .Xr pflogd 8 logging daemon, which dumps the logged packets to the file @@ -1221,35 +1354,48 @@ logging daemon, which dumps the logged packets to the file in .Xr pcap 3 binary format. -.It Ar log-all -Used with -.Ar keep state , -.Ar modulate state -or -.Ar synproxy state -rules to force logging of all packets for a connection. +.It Ar log (all) +Used to force logging of all packets for a connection. +This is not necessary when +.Ar no state +is explicitly specified. As with .Ar log , packets are logged to .Xr pflog 4 . +.It Ar log (user) +Logs the +.Ux +user ID of the user that owns the socket and the PID of the process that +has the socket open where the packet is sourced from or destined to +(depending on which socket is local). +This is in addition to the normal information logged. +.It Ar log (to Aq Ar interface ) +Send logs to the specified +.Xr pflog 4 +interface instead of +.Ar pflog0 . .It Ar quick If a packet matches a rule which has the .Ar quick option set, this rule is considered the last matching rule, and evaluation of subsequent rules is skipped. -.It Ar on <interface> +.It Ar on Aq Ar interface This rule applies only to packets coming in on, or going out through, this -particular interface. -It is also possible to simply give the interface driver name, like ppp or fxp, -to make the rule match packets flowing through a group of interfaces. -.It Ar <af> +particular interface or interface group. +For more information on interface groups, +see the +.Ic group +keyword in +.Xr ifconfig 8 . +.It Aq Ar af This rule applies only to packets of this address family. Supported values are .Ar inet and .Ar inet6 . -.It Ar proto <protocol> +.It Ar proto Aq Ar protocol This rule applies only to packets of this protocol. Common protocols are .Xr icmp 4 , @@ -1262,8 +1408,11 @@ For a list of all the protocol name to number mappings used by see the file .Em /etc/protocols . .It Xo -.Ar from <source> port <source> os <source> -.Ar to <dest> port <dest> +.Ar from Aq Ar source +.Ar port Aq Ar source +.Ar os Aq Ar source +.Ar to Aq Ar dest +.Ar port Aq Ar dest .Xc This rule applies only to packets with the specified source and destination addresses and ports. @@ -1274,16 +1423,20 @@ symbolic host names or interface names, or as any of the following keywords: .Bl -tag -width xxxxxxxxxxxxxx -compact .It Ar any Any address. -.It Ar route <label> +.It Ar route Aq Ar label Any address whose associated route has label -.Ar <label> . +.Aq Ar label . See .Xr route 4 and .Xr route 8 . .It Ar no-route Any address which is not currently routable. -.It Ar <table> +.It Ar urpf-failed +Any source address that fails a unicast reverse path forwarding (URPF) +check, i.e. packets coming in on an interface other than that which holds +the route back to the packet's source address. +.It Aq Ar table Any address that matches the given table. .El .Pp @@ -1330,30 +1483,33 @@ Ports and ranges of ports are specified by using these operators: .Bd -literal -offset indent = (equal) != (unequal) -< (less than) -<= (less than or equal) -> (greater than) ->= (greater than or equal) +\*(Lt (less than) +\*(Le (less than or equal) +\*(Gt (greater than) +\*(Ge (greater than or equal) : (range including boundaries) ->< (range excluding boundaries) -<> (except range) +\*(Gt\*(Lt (range excluding boundaries) +\*(Lt\*(Gt (except range) .Ed .Pp -><, <> and : +.Sq \*(Gt\*(Lt , +.Sq \*(Lt\*(Gt +and +.Sq \&: are binary operators (they take two arguments). For instance: .Bl -tag -width Fl .It Ar port 2000:2004 means -.Sq all ports >= 2000 and <= 2004 , +.Sq all ports \*(Ge 2000 and \*(Le 2004 , hence ports 2000, 2001, 2002, 2003 and 2004. -.It Ar port 2000 >< 2004 +.It Ar port 2000 \*(Gt\*(Lt 2004 means -.Sq all ports > 2000 and < 2004 , +.Sq all ports \*(Gt 2000 and \*(Lt 2004 , hence ports 2001, 2002 and 2003. -.It Ar port 2000 <> 2004 +.It Ar port 2000 \*(Lt\*(Gt 2004 means -.Sq all ports < 2000 or > 2004 , +.Sq all ports \*(Lt 2000 or \*(Gt 2004 , hence ports 1-1999 and 2005-65535. .El .Pp @@ -1369,20 +1525,20 @@ The host, port and OS specifications are optional, as in the following examples: .Bd -literal -offset indent pass in all pass in from any to any -pass in proto tcp from any port <= 1024 to any +pass in proto tcp from any port \*(Le 1024 to any pass in proto tcp from any to any port 25 -pass in proto tcp from 10.0.0.0/8 port > 1024 \e +pass in proto tcp from 10.0.0.0/8 port \*(Gt 1024 \e to ! 10.1.2.3 port != ssh -pass in proto tcp from any os "OpenBSD" flags S/SA +pass in proto tcp from any os "OpenBSD" pass in proto tcp from route "DTAG" .Ed .It Ar all This is equivalent to "from any to any". -.It Ar group <group> +.It Ar group Aq Ar group Similar to .Ar user , this rule only applies to packets of sockets owned by the specified group. -.It Ar user <user> +.It Ar user Aq Ar user This rule only applies to packets of sockets owned by the specified user. For outgoing connections initiated from the firewall, this is the user that opened the connection. @@ -1415,7 +1571,7 @@ can only be used with the operators and .Cm != . Other constructs like -.Cm user >= unknown +.Cm user \*(Ge unknown are invalid. Forwarded packets with unknown user and group ID match only rules that explicitly compare against @@ -1425,29 +1581,37 @@ with the operators or .Cm != . For instance -.Cm user >= 0 +.Cm user \*(Ge 0 does not match forwarded packets. The following example allows only selected users to open outgoing connections: .Bd -literal -offset indent block out proto { tcp, udp } all -pass out proto { tcp, udp } all \e - user { < 1000, dhartmei } keep state +pass out proto { tcp, udp } all user { \*(Lt 1000, dhartmei } .Ed -.It Ar flags <a>/<b> | /<b> +.It Xo Ar flags Aq Ar a +.Pf / Ns Aq Ar b +.No \*(Ba / Ns Aq Ar b +.No \*(Ba any +.Xc This rule only applies to TCP packets that have the flags -.Ar <a> +.Aq Ar a set out of set -.Ar <b> . +.Aq Ar b . Flags not specified in -.Ar <b> +.Aq Ar b are ignored. +For stateful connections, the default is +.Ar flags S/SA . +To indicate that flags should not be checkd at all, specify +.Ar flags any . The flags are: (F)IN, (S)YN, (R)ST, (P)USH, (A)CK, (U)RG, (E)CE, and C(W)R. .Bl -tag -width Fl .It Ar flags S/S Flag SYN is set. The other flags are ignored. .It Ar flags S/SA +This is the default setting for stateful connections. Out of SYN and ACK, exactly SYN may be set. SYN, SYN+PSH and SYN+RST match, but SYN+ACK, ACK and ACK+RST do not. This is more restrictive than the previous example. @@ -1455,8 +1619,38 @@ This is more restrictive than the previous example. If the first set is not specified, it defaults to none. All of SYN, FIN, RST and ACK must be unset. .El -.It Ar icmp-type <type> code <code> -.It Ar icmp6-type <type> code <code> +.Pp +Because +.Ar flags S/SA +is applied by default (unless +.Ar no state +is specified), only the initial SYN packet of a TCP handshake will create +a state for a TCP connection. +It is possible to be less restrictive, and allow state creation from +intermediate +.Pq non-SYN +packets, by specifying +.Ar flags any . +This will cause +.Xr pf 4 +to synchronize to existing connections, for instance +if one flushes the state table. +However, states created from such intermediate packets may be missing +connection details such as the TCP window scaling factor. +States which modify the packet flow, such as those affected by +.Ar nat , binat No or Ar rdr +rules, +.Ar modulate No or Ar synproxy state +options, or scrubbed with +.Ar reassemble tcp +will also not be recoverable from intermediate packets. +Such connections will stall and time out. +.It Xo Ar icmp-type Aq Ar type +.Ar code Aq Ar code +.Xc +.It Xo Ar icmp6-type Aq Ar type +.Ar code Aq Ar code +.Xc This rule only applies to ICMP or ICMPv6 packets with the specified type and code. Text names for ICMP types and codes are listed in @@ -1472,6 +1666,26 @@ or .Ar icmp6-type .Pc must match. +.It Xo Ar tos Aq Ar string +.No \*(Ba Aq Ar number +.Xc +This rule applies to packets with the specified +.Em TOS +bits set. +.Em TOS +may be +given as one of +.Ar lowdelay , +.Ar throughput , +.Ar reliability , +or as either hex or decimal. +.Pp +For example, the following rules are identical: +.Bd -literal -offset indent +pass all tos lowdelay +pass all tos 0x10 +pass all tos 16 +.Ed .It Ar allow-opts By default, packets which contain IP options are blocked. When @@ -1486,7 +1700,7 @@ The implicit .Ar pass rule that is used when a packet does not match any rules does not allow IP options. -.It Ar label <string> +.It Ar label Aq Ar string Adds a label (name) to the rule, which can be used to identify the rule. For instance, pfctl -s labels @@ -1515,24 +1729,27 @@ For example: .Bd -literal -offset indent ips = \&"{ 1.2.3.4, 1.2.3.5 }\&" pass in proto tcp from any to $ips \e - port > 1023 label \&"$dstaddr:$dstport\&" + port \*(Gt 1023 label \&"$dstaddr:$dstport\&" .Ed .Pp expands to .Bd -literal -offset indent pass in inet proto tcp from any to 1.2.3.4 \e - port > 1023 label \&"1.2.3.4:>1023\&" + port \*(Gt 1023 label \&"1.2.3.4:\*(Gt1023\&" pass in inet proto tcp from any to 1.2.3.5 \e - port > 1023 label \&"1.2.3.5:>1023\&" + port \*(Gt 1023 label \&"1.2.3.5:\*(Gt1023\&" .Ed .Pp The macro expansion for the .Ar label directive occurs only at configuration file parse time, not during runtime. -.It Ar queue <queue> | ( <queue> , <queue> ) +.It Xo Ar queue Aq Ar queue +.No \*(Ba ( Aq Ar queue , +.Aq Ar queue ) +.Xc Packets matching this rule will be assigned to the specified queue. If two queues are given, packets which have a -.Em tos +.Em TOS of .Em lowdelay and TCP ACKs with no data payload will be assigned to the second one. @@ -1545,7 +1762,7 @@ For example: pass in proto tcp to port 25 queue mail pass in proto tcp to port 22 queue(ssh_bulk, ssh_prio) .Ed -.It Ar tag <string> +.It Ar tag Aq Ar string Packets matching this rule will be tagged with the specified string. The tag acts as an internal marker that can be used to @@ -1560,14 +1777,6 @@ is not the last matching rule. Further matching rules can replace the tag with a new one but will not remove a previously applied tag. A packet is only ever assigned one tag at a time. -.Ar pass -rules that use the -.Ar tag -keyword must also use -.Ar keep state , -.Ar modulate state -or -.Ar synproxy state . Packet tagging can be done during .Ar nat , .Ar rdr , @@ -1575,7 +1784,7 @@ or .Ar binat rules in addition to filter rules. Tags take the same macros as labels (see above). -.It Ar tagged <string> +.It Ar tagged Aq Ar string Used with filter or translation rules to specify that packets must already be tagged with the given tag in order to match the rule. Inverse tag matching can also be done @@ -1584,7 +1793,10 @@ by specifying the operator before the .Ar tagged keyword. -.It Ar probability <number> +.It Ar rtable Aq Ar number +Used to select an alternate routing table for the routing lookup. +Only effective before the route lookup happened, i.e. when filtering inbound. +.It Ar probability Aq Ar number A probability attribute can be attached to a rule, with a value set between 0 and 1, bounds not included. In that case, the rule will be honoured using the given probability value @@ -1706,124 +1918,6 @@ beyond the lifetime of the states, increase the global options with See .Sx STATEFUL TRACKING OPTIONS for more ways to control the source tracking. -.Sh STATEFUL INSPECTION -.Xr pf 4 -is a stateful packet filter, which means it can track the state of -a connection. -Instead of passing all traffic to port 25, for instance, it is possible -to pass only the initial packet, and then begin to keep state. -Subsequent traffic will flow because the filter is aware of the connection. -.Pp -If a packet matches a -.Ar pass ... keep state -rule, the filter creates a state for this connection and automatically -lets pass all subsequent packets of that connection. -.Pp -Before any rules are evaluated, the filter checks whether the packet -matches any state. -If it does, the packet is passed without evaluation of any rules. -.Pp -States are removed after the connection is closed or has timed out. -.Pp -This has several advantages. -Comparing a packet to a state involves checking its sequence numbers. -If the sequence numbers are outside the narrow windows of expected -values, the packet is dropped. -This prevents spoofing attacks, such as when an attacker sends packets with -a fake source address/port but does not know the connection's sequence -numbers. -.Pp -Also, looking up states is usually faster than evaluating rules. -If there are 50 rules, all of them are evaluated sequentially in O(n). -Even with 50000 states, only 16 comparisons are needed to match a -state, since states are stored in a binary search tree that allows -searches in O(log2 n). -.Pp -For instance: -.Bd -literal -offset indent -block all -pass out proto tcp from any to any flags S/SA keep state -pass in proto tcp from any to any port 25 flags S/SA keep state -.Ed -.Pp -This ruleset blocks everything by default. -Only outgoing connections and incoming connections to port 25 are allowed. -The initial packet of each connection has the SYN -flag set, will be passed and creates state. -All further packets of these connections are passed if they match a state. -.Pp -By default, packets coming in and out of any interface can match a state, -but it is also possible to change that behaviour by assigning states to a -single interface or a group of interfaces. -.Pp -The default policy is specified by the -.Ar state-policy -global option, but this can be adjusted on a per-rule basis by adding one -of the -.Ar if-bound , -.Ar group-bound -or -.Ar floating -keywords to the -.Ar keep state -option. -For example, if a rule is defined as: -.Bd -literal -offset indent -pass out on ppp from any to 10.12/16 keep state (group-bound) -.Ed -.Pp -A state created on ppp0 would match packets an all PPP interfaces, -but not packets flowing through fxp0 or any other interface. -.Pp -Keeping rules -.Ar floating -is the more flexible option when the firewall is in a dynamic routing -environment. -However, this has some security implications since a state created by one -trusted network could allow potentially hostile packets coming in from other -interfaces. -.Pp -Specifying -.Ar flags S/SA -restricts state creation to the initial SYN -packet of the TCP handshake. -One can also be less restrictive, and allow state creation from -intermediate -.Pq non-SYN -packets. -This will cause -.Xr pf 4 -to synchronize to existing connections, for instance -if one flushes the state table. -.Pp -For UDP, which is stateless by nature, -.Ar keep state -will create state as well. -UDP packets are matched to states using only host addresses and ports. -.Pp -ICMP messages fall into two categories: ICMP error messages, which always -refer to a TCP or UDP packet, are matched against the referred to connection. -If one keeps state on a TCP connection, and an ICMP source quench message -referring to this TCP connection arrives, it will be matched to the right -state and get passed. -.Pp -For ICMP queries, -.Ar keep state -creates an ICMP state, and -.Xr pf 4 -knows how to match ICMP replies to states. -For example, -.Bd -literal -offset indent -pass out inet proto icmp all icmp-type echoreq keep state -.Ed -.Pp -allows echo requests (such as those created by -.Xr ping 8 ) -out, creates state, and matches incoming echo replies correctly to states. -.Pp -Note: -.Ar nat , binat No and Ar rdr -rules implicitly create state for connections. .Sh STATE MODULATION Much of the security derived from TCP is attributable to how well the initial sequence numbers (ISNs) are chosen. @@ -1846,25 +1940,10 @@ For instance: .Bd -literal -offset indent block all pass out proto tcp from any to any modulate state -pass in proto tcp from any to any port 25 flags S/SA modulate state +pass in proto tcp from any to any port 25 flags S/SFRA modulate state .Ed .Pp -There are two caveats associated with state modulation: -A -.Ar modulate state -rule can not be applied to a pre-existing but unmodulated connection. -Such an application would desynchronize TCP's strict -sequencing between the two endpoints. -Instead, -.Xr pf 4 -will treat the -.Ar modulate state -modifier as a -.Ar keep state -modifier and the pre-existing connection will be inferred without -the protection conferred by modulation. -.Pp -The other caveat affects currently modulated states when the state table +Note that modulated connections will not recover when the state table is lost (firewall reboot, flushing the state table, etc...). .Xr pf 4 will not be able to infer a connection again after the state table flushes @@ -1873,11 +1952,20 @@ When the state is lost, the connection may be left dangling until the respective endpoints time out the connection. It is possible on a fast local network for the endpoints to start an ACK storm while trying to resynchronize after the loss of the modulator. -Using a -.Ar flags S/SA -modifier on +The default +.Ar flags +settings (or a more strict equivalent) should be used on .Ar modulate state -rules between fast networks is suggested to prevent ACK storms. +rules to prevent ACK storms. +.Pp +Note that alternative methods are available +to prevent loss of the state table +and allow for firewall failover. +See +.Xr carp 4 +and +.Xr pfsync 4 +for further information. .Sh SYN PROXY By default, .Xr pf 4 @@ -1903,12 +1991,9 @@ chooses random initial sequence numbers for both handshakes. Once the handshakes are completed, the sequence number modulators (see previous section) are used to translate further packets of the connection. -Hence, .Ar synproxy state includes -.Ar modulate state -and -.Ar keep state . +.Ar modulate state . .Pp Rules with .Ar synproxy @@ -1919,18 +2004,21 @@ operates on a .Pp Example: .Bd -literal -offset indent -pass in proto tcp from any to any port www flags S/SA synproxy state +pass in proto tcp from any to any port www synproxy state .Ed .Sh STATEFUL TRACKING OPTIONS -All three of +A number of options related to stateful tracking can be applied on a +per-rule basis. .Ar keep state , .Ar modulate state and .Ar synproxy state -support the following options: +support these options, and +.Ar keep state +must be specified explicitly to apply options to a rule. .Pp .Bl -tag -width xxxx -compact -.It Ar max <number> +.It Ar max Aq Ar number Limits the number of concurrent states the rule may create. When this limit is reached, further packets matching the rule that would create state are dropped, until existing states time out. @@ -1938,7 +2026,9 @@ create state are dropped, until existing states time out. Prevent state changes for states created by this rule from appearing on the .Xr pfsync 4 interface. -.It Ar <timeout> <seconds> +.It Xo Aq Ar timeout +.Aq Ar seconds +.Xc Changes the timeout values used for states created by this rule. For a list of all valid timeout names, see .Sx OPTIONS @@ -1948,7 +2038,7 @@ above. Multiple options can be specified, separated by commas: .Bd -literal -offset indent pass in proto tcp from any to any \e - port www flags S/SA keep state \e + port www keep state \e (max 100, source-track rule, max-src-nodes 75, \e max-src-states 3, tcp.established 60, tcp.closing 5) .Ed @@ -1962,7 +2052,7 @@ keyword is specified, the number of states per source IP is tracked. The maximum number of states created by this rule is limited by the rule's .Ar max-src-nodes and -.Ar max-src-state +.Ar max-src-states options. Only state entries created by this particular rule count toward the rule's limits. @@ -1979,10 +2069,10 @@ each individual rule's limits. The following limits can be set: .Pp .Bl -tag -width xxxx -compact -.It Ar max-src-nodes <number> +.It Ar max-src-nodes Aq Ar number Limits the maximum number of source addresses which can simultaneously have state table entries. -.It Ar max-src-states <number> +.It Ar max-src-states Aq Ar number Limits the maximum number of simultaneous state entries that a single source address can create with this rule. .El @@ -1992,10 +2082,12 @@ which have completed the TCP 3-way handshake) can also be enforced per source IP. .Pp .Bl -tag -width xxxx -compact -.It Ar max-src-conn <number> +.It Ar max-src-conn Aq Ar number Limits the maximum number of simultaneous TCP connections which have completed the 3-way handshake that a single host can make. -.It Ar max-src-conn-rate <number> / <seconds> +.It Xo Ar max-src-conn-rate Aq Ar number +.No / Aq Ar seconds +.Xc Limit the rate of new connections over a time interval. The connection rate is an approximation calculated as a moving average. .El @@ -2003,7 +2095,7 @@ The connection rate is an approximation calculated as a moving average. Because the 3-way handshake ensures that the source address is not being spoofed, more aggressive action can be taken based on these limits. With the -.Ar overload <table> +.Ar overload Aq Ar table state option, source IP addresses which hit either of the limits on established connections will be added to the named table. This table can be used in the ruleset to block further activity from @@ -2022,13 +2114,15 @@ offending host, regardless of which rule created the state. For example, the following rules will protect the webserver against hosts making more than 100 connections in 10 seconds. Any host which connects faster than this rate will have its address added -to the <bad_hosts> table and have all states originating from it flushed. +to the +.Aq bad_hosts +table and have all states originating from it flushed. Any new packets arriving from this host will be dropped unconditionally by the block rule. .Bd -literal -offset indent -block quick from <bad_hosts> -pass in on $ext_if proto tcp to $webserver port www flags S/SA keep state \e - (max-src-conn-rate 100/10, overload <bad_hosts> flush global) +block quick from \*(Ltbad_hosts\*(Gt +pass in on $ext_if proto tcp to $webserver port www keep state \e + (max-src-conn-rate 100/10, overload \*(Ltbad_hosts\*(Gt flush global) .Ed .Sh OPERATING SYSTEM FINGERPRINTING Passive OS Fingerprinting is a mechanism to inspect nuances of a TCP @@ -2041,17 +2135,23 @@ upon. The fingerprints may be specified by operating system class, by version, or by subtype/patchlevel. The class of an operating system is typically the vendor or genre -and would be OpenBSD for the +and would be +.Ox +for the .Xr pf 4 firewall itself. -The version of the oldest available OpenBSD release on the main ftp site +The version of the oldest available +.Ox +release on the main FTP site would be 2.6 and the fingerprint would be written .Pp .Dl \&"OpenBSD 2.6\&" .Pp The subtype of an operating system is typically used to describe the patchlevel if that patch led to changes in the TCP stack behavior. -In the case of OpenBSD, the only subtype is for a fingerprint that was +In the case of +.Ox , +the only subtype is for a fingerprint that was normalized by the .Ar no-df scrub option and would be specified as @@ -2079,12 +2179,12 @@ which no operating system fingerprint is known. .Pp Examples: .Bd -literal -offset indent -pass out proto tcp from any os OpenBSD keep state +pass out proto tcp from any os OpenBSD block out proto tcp from any os Doors block out proto tcp from any os "Doors PT" block out proto tcp from any os "Doors PT SP3" block out from any os "unknown" -pass on lo0 proto tcp from any os "OpenBSD 3.3 lo0" keep state +pass on lo0 proto tcp from any os "OpenBSD 3.3 lo0" .Ed .Pp Operating system fingerprinting is limited only to the TCP SYN packet. @@ -2233,25 +2333,28 @@ attachment point using the following kinds of rules: .Bl -tag -width xxxx -.It Ar nat-anchor <name> +.It Ar nat-anchor Aq Ar name Evaluates the .Ar nat rules in the specified .Ar anchor . -.It Ar rdr-anchor <name> +.It Ar rdr-anchor Aq Ar name Evaluates the .Ar rdr rules in the specified .Ar anchor . -.It Ar binat-anchor <name> +.It Ar binat-anchor Aq Ar name Evaluates the .Ar binat rules in the specified .Ar anchor . -.It Ar anchor <name> +.It Ar anchor Aq Ar name Evaluates the filter rules in the specified .Ar anchor . -.It Ar load anchor <name> from <file> +.It Xo Ar load anchor +.Aq Ar name +.Ar from Aq Ar file +.Xc Loads the rules from the specified file into the anchor .Ar name . @@ -2263,11 +2366,17 @@ rule, .Xr pf 4 will proceed to evaluate all rules specified in that anchor. .Pp -Matching filter and translation rules in anchors with the +Matching filter and translation rules marked with the .Ar quick option are final and abort the evaluation of the rules in other -anchors -and the main ruleset. +anchors and the main ruleset. +If the +.Ar anchor +itself is marked with the +.Ar quick +option, +ruleset evaluation will terminate when the anchor is exited if the packet is +matched by any rule within the anchor. .Pp .Ar anchor rules are evaluated relative to the anchor in which they are contained. @@ -2291,9 +2400,9 @@ For example, ext_if = \&"kue0\&" block on $ext_if all anchor spam -pass out on $ext_if all keep state +pass out on $ext_if all pass in on $ext_if proto tcp from any \e - to $ext_if port smtp keep state + to $ext_if port smtp .Ed .Pp blocks all packets on the external interface by default, then evaluates @@ -2341,8 +2450,8 @@ This allows conditional evaluation of anchors, like: .Bd -literal -offset indent block on $ext_if all anchor spam proto tcp from any to any port smtp -pass out on $ext_if all keep state -pass in on $ext_if proto tcp from any to $ext_if port smtp keep state +pass out on $ext_if all +pass in on $ext_if proto tcp from any to $ext_if port smtp .Ed .Pp The rules inside @@ -2397,6 +2506,22 @@ anchor, if any, before finally evaluating the .Ar pass rule. .Pp +Filter rule +.Ar anchors +can also be loaded inline in the ruleset within a brace ('{' '}') delimited +block. +Brace delimited blocks may contain rules or other brace-delimited blocks. +When anchors are loaded this way the anchor name becomes optional. +.Bd -literal -offset indent +anchor "external" on egress { + block + anchor out { + pass proto tcp from any to port { 25, 80, 443 } + } + pass in proto tcp to any port 22 +} +.Ed +.Pp Since the parser specification for anchor names is a string, any reference to an anchor name containing solidus .Pq Sq / @@ -2412,7 +2537,7 @@ and therefore lacks permission to bind to port 80). ext_if = \&"ne3\&" # map daemon on 8080 to appear to be on 80 -rdr on $ext_if proto tcp from any to any port 80 -> 127.0.0.1 port 8080 +rdr on $ext_if proto tcp from any to any port 80 -\*(Gt 127.0.0.1 port 8080 .Ed .Pp If the @@ -2420,7 +2545,7 @@ If the modifier is given, packets matching the translation rule are passed without inspecting the filter rules: .Bd -literal -rdr pass on $ext_if proto tcp from any to any port 80 -> 127.0.0.1 \e +rdr pass on $ext_if proto tcp from any to any port 80 -\*(Gt 127.0.0.1 \e port 8080 .Ed .Pp @@ -2433,7 +2558,7 @@ network appear as though it is the Internet routable address for the nodes on vlan12. (Thus, 192.168.168.1 can talk to the 192.168.168.0/24 nodes.) .Bd -literal -nat on ! vlan12 from 192.168.168.0/24 to any -> 204.92.77.111 +nat on ! vlan12 from 192.168.168.0/24 to any -\*(Gt 204.92.77.111 .Ed .Pp In the example below, the machine sits between a fake internal 144.19.74.* @@ -2444,7 +2569,7 @@ rule excludes protocol AH from being translated. .Bd -literal # NO NAT no nat on $ext_if proto ah from 144.19.74.0/24 to any -nat on $ext_if from 144.19.74.0/24 to any -> 204.92.77.100 +nat on $ext_if from 144.19.74.0/24 to any -\*(Gt 204.92.77.100 .Ed .Pp In the example below, packets bound for one specific server, as well as those @@ -2453,46 +2578,51 @@ generated by the sysadmins are not proxied; all other connections are. # NO RDR no rdr on $int_if proto { tcp, udp } from any to $server port 80 no rdr on $int_if proto { tcp, udp } from $sysadmins to any port 80 -rdr on $int_if proto { tcp, udp } from any to any port 80 -> 127.0.0.1 \e +rdr on $int_if proto { tcp, udp } from any to any port 80 -\*(Gt 127.0.0.1 \e port 80 .Ed .Pp This longer example uses both a NAT and a redirection. The external interface has the address 157.161.48.183. -On the internal interface, we are running +On localhost, we are running .Xr ftp-proxy 8 , -listening for outbound ftp sessions captured to port 8021. +waiting for FTP sessions to be redirected to it. +The three mandatory anchors for +.Xr ftp-proxy 8 +are omitted from this example; see the +.Xr ftp-proxy 8 +manpage. .Bd -literal # NAT # Translate outgoing packets' source addresses (any protocol). # In this case, any address but the gateway's external address is mapped. -nat on $ext_if inet from ! ($ext_if) to any -> ($ext_if) +nat on $ext_if inet from ! ($ext_if) to any -\*(Gt ($ext_if) # NAT PROXYING # Map outgoing packets' source port to an assigned proxy port instead of # an arbitrary port. # In this case, proxy outgoing isakmp with port 500 on the gateway. -nat on $ext_if inet proto udp from any port = isakmp to any -> ($ext_if) \e +nat on $ext_if inet proto udp from any port = isakmp to any -\*(Gt ($ext_if) \e port 500 # BINAT # Translate outgoing packets' source address (any protocol). # Translate incoming packets' destination address to an internal machine # (bidirectional). -binat on $ext_if from 10.1.2.150 to any -> $ext_if +binat on $ext_if from 10.1.2.150 to any -\*(Gt $ext_if # RDR # Translate incoming packets' destination addresses. # As an example, redirect a TCP and UDP port to an internal machine. rdr on $ext_if inet proto tcp from any to ($ext_if) port 8080 \e - -> 10.1.2.151 port 22 + -\*(Gt 10.1.2.151 port 22 rdr on $ext_if inet proto udp from any to ($ext_if) port 8080 \e - -> 10.1.2.151 port 53 + -\*(Gt 10.1.2.151 port 53 # RDR # Translate outgoing ftp control connections to send them to localhost # for proxying with ftp-proxy(8) running on port 8021. -rdr on $int_if proto tcp from any to any port 21 -> 127.0.0.1 port 8021 +rdr on $int_if proto tcp from any to any port 21 -\*(Gt 127.0.0.1 port 8021 .Ed .Pp In this example, a NAT gateway is set up to translate internal addresses @@ -2504,13 +2634,13 @@ network. # Translate outgoing packets' source addresses using an address pool. # A given source address is always translated to the same pool address by # using the source-hash keyword. -nat on $ext_if inet from any to any -> 192.0.2.16/28 source-hash +nat on $ext_if inet from any to any -\*(Gt 192.0.2.16/28 source-hash # RDR ROUND ROBIN # Translate incoming web server connections to a group of web servers on # the internal network. rdr on $ext_if proto tcp from any to any port 80 \e - -> { 10.1.2.155, 10.1.2.160, 10.1.2.161 } round-robin + -\*(Gt { 10.1.2.155, 10.1.2.160, 10.1.2.161 } round-robin .Ed .Sh FILTER EXAMPLES .Bd -literal @@ -2530,6 +2660,10 @@ block return log on $ext_if all # block anything coming from source we have no back routes for block in from no-route to any +# block packets whose ingress interface does not match the one in +# the route back to their source address +block in from urpf-failed to any + # block and log outgoing packets that do not have our address as source, # they are either spoofed or something is misconfigured (NAT disabled, # for instance), we want to be nice and do not send out garbage. @@ -2551,15 +2685,15 @@ block in log quick on $ext_if from { 10.0.0.0/8, 172.16.0.0/12, \e # so replies (like 0/0 for 8/0) will match queries # ICMP error messages (which always refer to a TCP/UDP packet) are # handled by the TCP/UDP states -pass on $ext_if inet proto icmp all icmp-type 8 code 0 keep state +pass on $ext_if inet proto icmp all icmp-type 8 code 0 # UDP # pass out all UDP connections and keep state -pass out on $ext_if proto udp all keep state +pass out on $ext_if proto udp all # pass in certain UDP connections and keep state (DNS) -pass in on $ext_if proto udp from any to any port domain keep state +pass in on $ext_if proto udp from any to any port domain # TCP @@ -2568,18 +2702,19 @@ pass out on $ext_if proto tcp all modulate state # pass in certain TCP connections and keep state (SSH, SMTP, DNS, IDENT) pass in on $ext_if proto tcp from any to any port { ssh, smtp, domain, \e - auth } flags S/SA keep state - -# pass in data mode connections for ftp-proxy running on this host. -# (see ftp-proxy(8) for details) -pass in on $ext_if proto tcp from any to 157.161.48.183 port >= 49152 \e - flags S/SA keep state + auth } # Do not allow Windows 9x SMTP connections since they are typically # a viral worm. Alternately we could limit these OSes to 1 connection each. block in on $ext_if proto tcp from any os {"Windows 95", "Windows 98"} \e to any port smtp +# IPv6 +# pass in/out all IPv6 traffic: note that we have to enable this in two +# different ways, on both our physical interface and our tunnel +pass quick on gif0 inet6 +pass quick on $ext_if proto ipv6 + # Packet Tagging # three interfaces: $int_if, $ext_if, and $wifi_if (wireless). NAT is @@ -2588,21 +2723,21 @@ block in on $ext_if proto tcp from any os {"Windows 95", "Windows 98"} \e # outgoing packets (i.e., packets from the wireless network) are only # permitted to access port 80. -pass in on $int_if from any to any tag INTNET keep state -pass in on $wifi_if from any to any keep state +pass in on $int_if from any to any tag INTNET +pass in on $wifi_if from any to any block out on $ext_if from any to any -pass out quick on $ext_if tagged INTNET keep state -pass out on $ext_if proto tcp from any to any port 80 keep state +pass out quick on $ext_if tagged INTNET +pass out on $ext_if proto tcp from any to any port 80 # tag incoming packets as they are redirected to spamd(8). use the tag # to pass those packets through the packet filter. -rdr on $ext_if inet proto tcp from <spammers> to port smtp \e - tag SPAMD -> 127.0.0.1 port spamd +rdr on $ext_if inet proto tcp from \*(Ltspammers\*(Gt to port smtp \e + tag SPAMD -\*(Gt 127.0.0.1 port spamd block in on $ext_if -pass in on $ext_if inet proto tcp tagged SPAMD keep state +pass in on $ext_if inet proto tcp tagged SPAMD .Ed .Sh GRAMMAR Syntax for @@ -2610,59 +2745,66 @@ Syntax for in BNF: .Bd -literal line = ( option | pf-rule | nat-rule | binat-rule | rdr-rule | - antispoof-rule | altq-rule | queue-rule | anchor-rule | - trans-anchors | load-anchors | table-rule ) + antispoof-rule | altq-rule | queue-rule | trans-anchors | + anchor-rule | anchor-close | load-anchor | table-rule | ) option = "set" ( [ "timeout" ( timeout | "{" timeout-list "}" ) ] | + [ "ruleset-optimization" [ "none" | "basic" | "profile" ]] | [ "optimization" [ "default" | "normal" | "high-latency" | "satellite" | "aggressive" | "conservative" ] ] [ "limit" ( limit-item | "{" limit-list "}" ) ] | [ "loginterface" ( interface-name | "none" ) ] | [ "block-policy" ( "drop" | "return" ) ] | - [ "state-policy" ( "if-bound" | "group-bound" | - "floating" ) ] + [ "state-policy" ( "if-bound" | "floating" ) ] [ "require-order" ( "yes" | "no" ) ] [ "fingerprints" filename ] | + [ "skip on" ( interface-name | "{" interface-list "}" ) ] | [ "debug" ( "none" | "urgent" | "misc" | "loud" ) ] ) pf-rule = action [ ( "in" | "out" ) ] - [ "log" | "log-all" ] [ "quick" ] - [ "on" ifspec ] [ route ] [ af ] [ protospec ] + [ "log" [ "(" logopts ")"] ] [ "quick" ] + [ "on" ifspec ] [ "fastroute" | route ] [ af ] [ protospec ] hosts [ filteropt-list ] +logopts = logopt [ "," logopts ] +logopt = "all" | "user" | "to" interface-name + filteropt-list = filteropt-list filteropt | filteropt filteropt = user | group | flags | icmp-type | icmp6-type | tos | - ( "keep" | "modulate" | "synproxy" ) "state" + ( "no" | "keep" | "modulate" | "synproxy" ) "state" [ "(" state-opts ")" ] | "fragment" | "no-df" | "min-ttl" number | "max-mss" number | "random-id" | "reassemble tcp" | fragmentation | "allow-opts" | - "label" string | "tag" string | [ ! ] "tagged" string + "label" string | "tag" string | [ ! ] "tagged" string | "queue" ( string | "(" string [ [ "," ] string ] ")" ) | - "probability" number"%" + "rtable" number | "probability" number"%" -nat-rule = [ "no" ] "nat" [ "pass" ] [ "on" ifspec ] [ af ] +nat-rule = [ "no" ] "nat" [ "pass" [ "log" [ "(" logopts ")" ] ] ] + [ "on" ifspec ] [ af ] [ protospec ] hosts [ "tag" string ] [ "tagged" string ] - [ "->" ( redirhost | "{" redirhost-list "}" ) + [ "-\*(Gt" ( redirhost | "{" redirhost-list "}" ) [ portspec ] [ pooltype ] [ "static-port" ] ] -binat-rule = [ "no" ] "binat" [ "pass" ] [ "on" interface-name ] - [ af ] [ "proto" ( proto-name | proto-number ) ] +binat-rule = [ "no" ] "binat" [ "pass" [ "log" [ "(" logopts ")" ] ] ] + [ "on" interface-name ] [ af ] + [ "proto" ( proto-name | proto-number ) ] "from" address [ "/" mask-bits ] "to" ipspec [ "tag" string ] [ "tagged" string ] - [ "->" address [ "/" mask-bits ] ] + [ "-\*(Gt" address [ "/" mask-bits ] ] -rdr-rule = [ "no" ] "rdr" [ "pass" ] [ "on" ifspec ] [ af ] +rdr-rule = [ "no" ] "rdr" [ "pass" [ "log" [ "(" logopts ")" ] ] ] + [ "on" ifspec ] [ af ] [ protospec ] hosts [ "tag" string ] [ "tagged" string ] - [ "->" ( redirhost | "{" redirhost-list "}" ) + [ "-\*(Gt" ( redirhost | "{" redirhost-list "}" ) [ portspec ] [ pooltype ] ] antispoof-rule = "antispoof" [ "log" ] [ "quick" ] "for" ( interface-name | "{" interface-list "}" ) [ af ] [ "label" string ] -table-rule = "table" "<" string ">" [ tableopts-list ] +table-rule = "table" "\*(Lt" string "\*(Gt" [ tableopts-list ] tableopts-list = tableopts-list tableopts | tableopts tableopts = "persist" | "const" | "file" string | "{" [ tableaddr-list ] "}" @@ -2676,8 +2818,10 @@ altq-rule = "altq on" interface-name queueopts-list queue-rule = "queue" string [ "on" interface-name ] queueopts-list subqueue -anchor-rule = "anchor" string [ ( "in" | "out" ) ] [ "on" ifspec ] - [ af ] [ "proto" ] [ protospec ] [ hosts ] +anchor-rule = "anchor" [ string ] [ ( "in" | "out" ) ] [ "on" ifspec ] + [ af ] [ protospec ] [ hosts ] [ "{" ] + +anchor-close = "}" trans-anchors = ( "nat-anchor" | "rdr-anchor" | "binat-anchor" ) string [ "on" ifspec ] [ af ] [ "proto" ] [ protospec ] [ hosts ] @@ -2693,15 +2837,14 @@ bandwidth-spec = "number" ( "b" | "Kb" | "Mb" | "Gb" | "%" ) action = "pass" | "block" [ return ] | [ "no" ] "scrub" return = "drop" | "return" | "return-rst" [ "( ttl" number ")" ] | - "return-icmp" [ "(" icmpcode ["," icmp6code ] ")" ] | + "return-icmp" [ "(" icmpcode [ [ "," ] icmp6code ] ")" ] | "return-icmp6" [ "(" icmp6code ")" ] icmpcode = ( icmp-code-name | icmp-code-number ) icmp6code = ( icmp6-code-name | icmp6-code-number ) ifspec = ( [ "!" ] interface-name ) | "{" interface-list "}" interface-list = [ "!" ] interface-name [ [ "," ] interface-list ] -route = "fastroute" | - ( "route-to" | "reply-to" | "dup-to" ) +route = ( "route-to" | "reply-to" | "dup-to" ) ( routehost | "{" routehost-list "}" ) [ pooltype ] af = "inet" | "inet6" @@ -2711,15 +2854,15 @@ protospec = "proto" ( proto-name | proto-number | proto-list = ( proto-name | proto-number ) [ [ "," ] proto-list ] hosts = "all" | - "from" ( "any" | "no-route" | "self" | host | + "from" ( "any" | "no-route" | "urpf-failed" | "self" | host | "{" host-list "}" | "route" string ) [ port ] [ os ] "to" ( "any" | "no-route" | "self" | host | "{" host-list "}" | "route" string ) [ port ] ipspec = "any" | host | "{" host-list "}" -host = [ "!" ] ( address [ "/" mask-bits ] | "<" string ">" ) +host = [ "!" ] ( address [ "/" mask-bits ] | "\*(Lt" string "\*(Gt" ) redirhost = address [ "/" mask-bits ] -routehost = ( interface-name [ address [ "/" mask-bits ] ] ) +routehost = "(" interface-name [ address [ "/" mask-bits ] ] ")" address = ( interface-name | "(" interface-name ")" | hostname | ipv4-dotted-quad | ipv6-coloned-hex ) host-list = host [ [ "," ] host-list ] @@ -2732,15 +2875,15 @@ os = "os" ( os-name | "{" os-list "}" ) user = "user" ( unary-op | binary-op | "{" op-list "}" ) group = "group" ( unary-op | binary-op | "{" op-list "}" ) -unary-op = [ "=" | "!=" | "<" | "<=" | ">" | ">=" ] +unary-op = [ "=" | "!=" | "\*(Lt" | "\*(Le" | "\*(Gt" | "\*(Ge" ] ( name | number ) -binary-op = number ( "<>" | "><" | ":" ) number +binary-op = number ( "\*(Lt\*(Gt" | "\*(Gt\*(Lt" | ":" ) number op-list = ( unary-op | binary-op ) [ [ "," ] op-list ] os-name = operating-system-name os-list = os-name [ [ "," ] os-list ] -flags = "flags" [ flag-set ] "/" flag-set +flags = "flags" ( [ flag-set ] "/" flag-set | "any" ) flag-set = [ "F" ] [ "S" ] [ "R" ] [ "P" ] [ "A" ] [ "U" ] [ "E" ] [ "W" ] @@ -2759,8 +2902,8 @@ state-opt = ( "max" number | "no-sync" | timeout | "max-src-nodes" number | "max-src-states" number | "max-src-conn" number | "max-src-conn-rate" number "/" number | - "overload" "<" string ">" [ "flush" ] | - "if-bound" | "group-bound" | "floating" ) + "overload" "\*(Lt" string "\*(Gt" [ "flush" ] | + "if-bound" | "floating" ) fragmentation = [ "fragment reassemble" | "fragment crop" | "fragment drop-ovl" ] @@ -2812,6 +2955,7 @@ Service name database. Example rulesets. .El .Sh SEE ALSO +.Xr carp 4 , .Xr icmp 4 , .Xr icmp6 4 , .Xr ip 4 , diff --git a/contrib/pf/man/pf.os.5 b/contrib/pf/man/pf.os.5 index f4bdeda..69e8344 100644 --- a/contrib/pf/man/pf.os.5 +++ b/contrib/pf/man/pf.os.5 @@ -1,4 +1,4 @@ -.\" $OpenBSD: pf.os.5,v 1.6 2004/03/31 11:13:03 dhartmei Exp $ +.\" $OpenBSD: pf.os.5,v 1.7 2005/11/16 20:07:18 stevesk Exp $ .\" .\" Copyright (c) 2003 Mike Frantzen <frantzen@w4g.org> .\" @@ -204,37 +204,15 @@ The output of .Bd -literal # tcpdump -s128 -c1 -nv 'tcp[13] == 2' - 03:13:48.118526 10.0.0.1.3377 > 10.0.0.0.2: S [tcp sum ok] \e + 03:13:48.118526 10.0.0.1.3377 > 10.0.0.2.80: S [tcp sum ok] \e 534596083:534596083(0) win 57344 <mss 1460> (DF) [tos 0x10] \e - (ttl 64, id 11315) + (ttl 64, id 11315, len 44) .Ed .Pp almost translates into the following fingerprint .Bd -literal 57344:64:1:44:M1460: exampleOS:1.0::exampleOS 1.0 .Ed -.Pp -.Xr tcpdump 8 -does not explicitly give the packet length. -But it can usually be derived by adding the size of the IPv4 header to -the size of the TCP header to the size of the TCP options. -The size of both headers is typically twenty each and the usual -sizes of the TCP options are: -.Pp -.Bl -tag -width timestamp -offset indent -compact -.It mss -four bytes. -.It nop -1 byte. -.It sackOK -two bytes. -.It timestamp -ten bytes. -.It wscale -three bytes. -.El -.Pp -In the above example, the packet size comes out to 44 bytes. .Sh SEE ALSO .Xr pf 4 , .Xr pf.conf 5 , diff --git a/contrib/pf/man/pflog.4 b/contrib/pf/man/pflog.4 index d7bee13..2b2e22b 100644 --- a/contrib/pf/man/pflog.4 +++ b/contrib/pf/man/pflog.4 @@ -1,4 +1,4 @@ -.\" $OpenBSD: pflog.4,v 1.7 2004/03/21 19:47:59 miod Exp $ +.\" $OpenBSD: pflog.4,v 1.9 2006/10/25 12:51:31 jmc Exp $ .\" .\" Copyright (c) 2001 Tobias Weingartner .\" All rights reserved. @@ -45,6 +45,14 @@ on the interface, or stored to disk using .Xr pflogd 8 . .Pp +The pflog0 interface is created automatically at boot if both +.Xr pf 4 +and +.Xr pflogd 8 +are enabled; +further instances can be created using +.Xr ifconfig 8 . +.Pp Each packet retrieved on this interface has a header associated with it of length .Dv PFLOG_HDRLEN . @@ -63,14 +71,22 @@ struct pfloghdr { char ruleset[PF_RULESET_NAME_SIZE]; u_int32_t rulenr; u_int32_t subrulenr; + uid_t uid; + pid_t pid; + uid_t rule_uid; + pid_t rule_pid; u_int8_t dir; u_int8_t pad[3]; }; .Ed .Sh EXAMPLES +Create a +.Nm +interface +and monitor all packets logged on it: .Bd -literal -offset indent -# ifconfig pflog0 up -# tcpdump -n -e -ttt -i pflog0 +# ifconfig pflog1 up +# tcpdump -n -e -ttt -i pflog1 .Ed .Sh SEE ALSO .Xr inet 4 , diff --git a/contrib/pf/man/pfsync.4 b/contrib/pf/man/pfsync.4 index 4c3c698..43f13b2 100644 --- a/contrib/pf/man/pfsync.4 +++ b/contrib/pf/man/pfsync.4 @@ -1,4 +1,4 @@ -.\" $OpenBSD: pfsync.4,v 1.22 2005/02/24 15:53:17 jmc Exp $ +.\" $OpenBSD: pfsync.4,v 1.24 2006/10/23 07:05:49 jmc Exp $ .\" .\" Copyright (c) 2002 Michael Shalayeff .\" Copyright (c) 2003-2004 Ryan McBride @@ -200,7 +200,7 @@ The following should be added to the top of .Pa /etc/pf.conf : .Bd -literal -offset indent pass quick on { sis2 } proto pfsync -pass on { sis0 sis1 } proto carp keep state +pass on { sis0 sis1 } proto carp .Ed .Pp If it is preferable that one firewall handle the traffic, @@ -236,6 +236,7 @@ net.inet.carp.preempt=1 .Xr pf.conf 5 , .Xr protocols 5 , .Xr ifconfig 8 , +.Xr ifstated 8 , .Xr tcpdump 8 .Sh HISTORY The diff --git a/contrib/pf/pfctl/parse.y b/contrib/pf/pfctl/parse.y index 179898f..ef5d77b 100644 --- a/contrib/pf/pfctl/parse.y +++ b/contrib/pf/pfctl/parse.y @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.y,v 1.482 2005/03/07 13:20:03 henning Exp $ */ +/* $OpenBSD: parse.y,v 1.517 2007/02/03 23:26:40 dhartmei Exp $ */ /* * Copyright (c) 2001 Markus Friedl. All rights reserved. @@ -199,10 +199,12 @@ struct filter_opts { char *tag; char *match_tag; u_int8_t match_tag_not; + int rtableid; } filter_opts; struct antispoof_opts { char *label; + int rtableid; } antispoof_opts; struct scrub_opts { @@ -216,6 +218,7 @@ struct scrub_opts { int fragcache; int randomid; int reassemble_tcp; + int rtableid; } scrub_opts; struct queue_opts { @@ -254,9 +257,10 @@ struct node_hfsc_opts hfsc_opts; int yyerror(const char *, ...); int disallow_table(struct node_host *, const char *); +int disallow_urpf_failed(struct node_host *, const char *); int disallow_alias(struct node_host *, const char *); -int rule_consistent(struct pf_rule *); -int filter_consistent(struct pf_rule *); +int rule_consistent(struct pf_rule *, int); +int filter_consistent(struct pf_rule *, int); int nat_consistent(struct pf_rule *); int rdr_consistent(struct pf_rule *); int process_tabledef(char *, struct table_opts *); @@ -306,6 +310,7 @@ struct sym { int symset(const char *, const char *, int); char *symget(const char *); +void mv_rules(struct pf_ruleset *, struct pf_ruleset *); void decide_address_family(struct node_host *, sa_family_t *); void remove_invalid_hosts(struct node_host **, sa_family_t *); int invalid_redirect(struct node_host *, sa_family_t); @@ -325,6 +330,7 @@ typedef struct { u_int32_t number; int i; char *string; + int rtableid; struct { u_int8_t b1; u_int8_t b2; @@ -367,6 +373,7 @@ typedef struct { } keep_state; struct { u_int8_t log; + u_int8_t logif; u_int8_t quick; } logquick; struct { @@ -395,30 +402,30 @@ typedef struct { %} -%token PASS BLOCK SCRUB RETURN IN OS OUT LOG LOGALL QUICK ON FROM TO FLAGS +%token PASS BLOCK SCRUB RETURN IN OS OUT LOG QUICK ON FROM TO FLAGS %token RETURNRST RETURNICMP RETURNICMP6 PROTO INET INET6 ALL ANY ICMPTYPE %token ICMP6TYPE CODE KEEP MODULATE STATE PORT RDR NAT BINAT ARROW NODF %token MINTTL ERROR ALLOWOPTS FASTROUTE FILENAME ROUTETO DUPTO REPLYTO NO LABEL -%token NOROUTE FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DROP TABLE +%token NOROUTE URPFFAILED FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DROP TABLE %token REASSEMBLE FRAGDROP FRAGCROP ANCHOR NATANCHOR RDRANCHOR BINATANCHOR %token SET OPTIMIZATION TIMEOUT LIMIT LOGINTERFACE BLOCKPOLICY RANDOMID %token REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG SKIP HOSTID %token ANTISPOOF FOR %token BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY %token ALTQ CBQ PRIQ HFSC BANDWIDTH TBRSIZE LINKSHARE REALTIME UPPERLIMIT -%token QUEUE PRIORITY QLIMIT -%token LOAD +%token QUEUE PRIORITY QLIMIT RTABLE +%token LOAD RULESET_OPTIMIZATION %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE %token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH -%token TAGGED TAG IFBOUND GRBOUND FLOATING STATEPOLICY ROUTE +%token TAGGED TAG IFBOUND FLOATING STATEPOLICY ROUTE %token <v.string> STRING %token <v.i> PORTBINARY %type <v.interface> interface if_list if_item_not if_item %type <v.number> number icmptype icmp6type uid gid -%type <v.number> tos not yesno natpass -%type <v.i> no dir log af fragcache sourcetrack flush -%type <v.i> unaryop statelock -%type <v.b> action nataction scrubaction +%type <v.number> tos not yesno +%type <v.i> no dir af fragcache optimizer +%type <v.i> sourcetrack flush unaryop statelock +%type <v.b> action nataction natpass scrubaction %type <v.b> flags flag blockspec %type <v.range> port rport %type <v.hashkey> hashkey @@ -437,10 +444,10 @@ typedef struct { %type <v.gid> gids gid_list gid_item %type <v.route> route %type <v.redirection> redirection redirpool -%type <v.string> label string tag +%type <v.string> label string tag anchorname %type <v.keep_state> keep %type <v.state_opt> state_opt_spec state_opt_list state_opt_item -%type <v.logquick> logquick +%type <v.logquick> logquick quick log logopts logopt %type <v.interface> antispoof_ifspc antispoof_iflst antispoof_if %type <v.qassign> qname %type <v.queue> qassign qassign_list qassign_item @@ -456,6 +463,7 @@ typedef struct { %type <v.table_opts> table_opts table_opt table_opts_l %type <v.pool_opts> pool_opts pool_opt pool_opts_l %type <v.tagged> tagged +%type <v.rtableid> rtable %% ruleset : /* empty */ @@ -472,9 +480,36 @@ ruleset : /* empty */ | ruleset varset '\n' | ruleset antispoof '\n' | ruleset tabledef '\n' + | '{' fakeanchor '}' '\n'; | ruleset error '\n' { errors++; } ; +/* + * apply to previouslys specified rule: must be careful to note + * what that is: pf or nat or binat or rdr + */ +fakeanchor : fakeanchor '\n' + | fakeanchor anchorrule '\n' + | fakeanchor binatrule '\n' + | fakeanchor natrule '\n' + | fakeanchor pfrule '\n' + | fakeanchor error '\n' + ; + +optimizer : string { + if (!strcmp($1, "none")) + $$ = 0; + else if (!strcmp($1, "basic")) + $$ = PF_OPTIMIZE_BASIC; + else if (!strcmp($1, "profile")) + $$ = PF_OPTIMIZE_BASIC | PF_OPTIMIZE_PROFILE; + else { + yyerror("unknown ruleset-optimization %s", $$); + YYERROR; + } + } + ; + option : SET OPTIMIZATION STRING { if (check_rulestate(PFCTL_STATE_OPTION)) { free($3); @@ -485,7 +520,13 @@ option : SET OPTIMIZATION STRING { free($3); YYERROR; } - free ($3); + free($3); + } + | SET RULESET_OPTIMIZATION optimizer { + if (!(pf->opts & PF_OPT_OPTIMIZE)) { + pf->opts |= PF_OPT_OPTIMIZE; + pf->optimize = $3; + } } | SET TIMEOUT timeout_spec | SET TIMEOUT '{' timeout_list '}' @@ -535,12 +576,12 @@ option : SET OPTIMIZATION STRING { } | SET FINGERPRINTS STRING { if (pf->opts & PF_OPT_VERBOSE) - printf("set fingerprints %s\n", $3); + printf("set fingerprints \"%s\"\n", $3); if (check_rulestate(PFCTL_STATE_OPTION)) { free($3); YYERROR; } - if (!pf->anchor[0]) { + if (!pf->anchor->name[0]) { if (pfctl_file_fingerprints(pf->dev, pf->opts, $3)) { yyerror("error loading " @@ -560,10 +601,6 @@ option : SET OPTIMIZATION STRING { case PFRULE_IFBOUND: printf("set state-policy if-bound\n"); break; - case PFRULE_GRBOUND: - printf("set state-policy " - "group-bound\n"); - break; } default_statelock = $3; } @@ -606,37 +643,120 @@ varset : STRING '=' string { } ; -anchorrule : ANCHOR string dir interface af proto fromto filter_opts { +anchorname : STRING { $$ = $1; } + | /* empty */ { $$ = NULL; } + ; + +optnl : optnl '\n' + | + ; + +pfa_anchorlist : pfrule optnl + | anchorrule optnl + | pfa_anchorlist pfrule optnl + | pfa_anchorlist anchorrule optnl + ; + +pfa_anchor : '{' + { + char ta[PF_ANCHOR_NAME_SIZE]; + struct pf_ruleset *rs; + + /* steping into a brace anchor */ + pf->asd++; + pf->bn++; + pf->brace = 1; + + /* create a holding ruleset in the root */ + snprintf(ta, PF_ANCHOR_NAME_SIZE, "_%d", pf->bn); + rs = pf_find_or_create_ruleset(ta); + if (rs == NULL) + err(1, "pfa_anchor: pf_find_or_create_ruleset"); + pf->astack[pf->asd] = rs->anchor; + pf->anchor = rs->anchor; + } '\n' pfa_anchorlist '}' + { + pf->alast = pf->anchor; + pf->asd--; + pf->anchor = pf->astack[pf->asd]; + } + | /* empty */ + ; + +anchorrule : ANCHOR anchorname dir quick interface af proto fromto + filter_opts pfa_anchor + { struct pf_rule r; if (check_rulestate(PFCTL_STATE_FILTER)) { + if ($2) + free($2); + YYERROR; + } + + if ($2 && ($2[0] == '_' || strstr($2, "/_") != NULL)) { free($2); + yyerror("anchor names beginning with '_' " + "are reserved for internal use"); YYERROR; } memset(&r, 0, sizeof(r)); + if (pf->astack[pf->asd + 1]) { + /* move inline rules into relative location */ + pf_anchor_setup(&r, + &pf->astack[pf->asd]->ruleset, + $2 ? $2 : pf->alast->name); + + if (r.anchor == NULL) + err(1, "anchorrule: unable to " + "create ruleset"); + + if (pf->alast != r.anchor) { + if (r.anchor->match) { + yyerror("inline anchor '%s' " + "already exists", + r.anchor->name); + YYERROR; + } + mv_rules(&pf->alast->ruleset, + &r.anchor->ruleset); + } + pf_remove_if_empty_ruleset(&pf->alast->ruleset); + pf->alast = r.anchor; + } else { + if (!$2) { + yyerror("anchors without explicit " + "rules must specify a name"); + YYERROR; + } + } r.direction = $3; - r.af = $5; - r.prob = $8.prob; + r.quick = $4.quick; + r.af = $6; + r.prob = $9.prob; + r.rtableid = $9.rtableid; - if ($8.match_tag) - if (strlcpy(r.match_tagname, $8.match_tag, + if ($9.match_tag) + if (strlcpy(r.match_tagname, $9.match_tag, PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) { yyerror("tag too long, max %u chars", PF_TAG_NAME_SIZE - 1); YYERROR; } - r.match_tag_not = $8.match_tag_not; + r.match_tag_not = $9.match_tag_not; - decide_address_family($7.src.host, &r.af); - decide_address_family($7.dst.host, &r.af); + decide_address_family($8.src.host, &r.af); + decide_address_family($8.dst.host, &r.af); - expand_rule(&r, $4, NULL, $6, $7.src_os, - $7.src.host, $7.src.port, $7.dst.host, $7.dst.port, - 0, 0, 0, $2); + expand_rule(&r, $5, NULL, $7, $8.src_os, + $8.src.host, $8.src.port, $8.dst.host, $8.dst.port, + 0, 0, 0, pf->astack[pf->asd + 1] ? + pf->alast->name : $2); free($2); + pf->astack[pf->asd + 1] = NULL; } - | NATANCHOR string interface af proto fromto { + | NATANCHOR string interface af proto fromto rtable { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) { @@ -647,6 +767,7 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { memset(&r, 0, sizeof(r)); r.action = PF_NAT; r.af = $4; + r.rtableid = $7; decide_address_family($6.src.host, &r.af); decide_address_family($6.dst.host, &r.af); @@ -656,7 +777,7 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { 0, 0, 0, $2); free($2); } - | RDRANCHOR string interface af proto fromto { + | RDRANCHOR string interface af proto fromto rtable { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) { @@ -667,6 +788,7 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { memset(&r, 0, sizeof(r)); r.action = PF_RDR; r.af = $4; + r.rtableid = $7; decide_address_family($6.src.host, &r.af); decide_address_family($6.dst.host, &r.af); @@ -697,7 +819,7 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { 0, 0, 0, $2); free($2); } - | BINATANCHOR string interface af proto fromto { + | BINATANCHOR string interface af proto fromto rtable { struct pf_rule r; if (check_rulestate(PFCTL_STATE_NAT)) { @@ -708,6 +830,7 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { memset(&r, 0, sizeof(r)); r.action = PF_BINAT; r.af = $4; + r.rtableid = $7; if ($5 != NULL) { if ($5->next != NULL) { yyerror("proto list expansion" @@ -736,7 +859,8 @@ anchorrule : ANCHOR string dir interface af proto fromto filter_opts { loadrule : LOAD ANCHOR string FROM string { struct loadanchors *loadanchor; - if (strlen($3) >= MAXPATHLEN) { + if (strlen(pf->anchor->name) + 1 + + strlen($3) >= MAXPATHLEN) { yyerror("anchorname %s too long, max %u\n", $3, MAXPATHLEN - 1); free($3); @@ -745,8 +869,14 @@ loadrule : LOAD ANCHOR string FROM string { loadanchor = calloc(1, sizeof(struct loadanchors)); if (loadanchor == NULL) err(1, "loadrule: calloc"); - if ((loadanchor->anchorname = strdup($3)) == NULL) - err(1, "loadrule: strdup"); + if ((loadanchor->anchorname = malloc(MAXPATHLEN)) == + NULL) + err(1, "loadrule: malloc"); + if (pf->anchor->name[0]) + snprintf(loadanchor->anchorname, MAXPATHLEN, + "%s/%s", pf->anchor->name, $3); + else + strlcpy(loadanchor->anchorname, $3, MAXPATHLEN); if ((loadanchor->filename = strdup($5)) == NULL) err(1, "loadrule: strdup"); @@ -779,6 +909,7 @@ scrubrule : scrubaction dir logquick interface af proto fromto scrub_opts r.direction = $2; r.log = $3.log; + r.logif = $3.logif; if ($3.quick) { yyerror("scrub rules do not support 'quick'"); YYERROR; @@ -803,6 +934,7 @@ scrubrule : scrubaction dir logquick interface af proto fromto scrub_opts r.max_mss = $8.maxmss; if ($8.fragcache) r.rule_flag |= $8.fragcache; + r.rtableid = $8.rtableid; expand_rule(&r, $4, NULL, $6, $7.src_os, $7.src.host, $7.src.port, $7.dst.host, $7.dst.port, @@ -811,12 +943,14 @@ scrubrule : scrubaction dir logquick interface af proto fromto scrub_opts ; scrub_opts : { - bzero(&scrub_opts, sizeof scrub_opts); - } + bzero(&scrub_opts, sizeof scrub_opts); + scrub_opts.rtableid = -1; + } scrub_opts_l { $$ = scrub_opts; } | /* empty */ { bzero(&scrub_opts, sizeof scrub_opts); + scrub_opts.rtableid = -1; $$ = scrub_opts; } ; @@ -885,6 +1019,13 @@ scrub_opt : NODF { } scrub_opts.randomid = 1; } + | RTABLE number { + if ($2 > RT_TABLEID_MAX || $2 < 0) { + yyerror("invalid rtable id"); + YYERROR; + } + scrub_opts.rtableid = $2; + } ; fragcache : FRAGMENT REASSEMBLE { $$ = 0; /* default */ } @@ -906,10 +1047,12 @@ antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts { r.action = PF_DROP; r.direction = PF_IN; r.log = $2.log; + r.logif = $2.logif; r.quick = $2.quick; r.af = $4; if (rule_label(&r, $5.label)) YYERROR; + r.rtableid = $5.rtableid; j = calloc(1, sizeof(struct node_if)); if (j == NULL) err(1, "antispoof: calloc"); @@ -960,6 +1103,7 @@ antispoof : ANTISPOOF logquick antispoof_ifspc af antispoof_opts { r.af = $4; if (rule_label(&r, $5.label)) YYERROR; + r.rtableid = $5.rtableid; if (hh != NULL) h = hh; else @@ -994,11 +1138,15 @@ antispoof_if : if_item { $$ = $1; } } ; -antispoof_opts : { bzero(&antispoof_opts, sizeof antispoof_opts); } +antispoof_opts : { + bzero(&antispoof_opts, sizeof antispoof_opts); + antispoof_opts.rtableid = -1; + } antispoof_opts_l { $$ = antispoof_opts; } | /* empty */ { bzero(&antispoof_opts, sizeof antispoof_opts); + antispoof_opts.rtableid = -1; $$ = antispoof_opts; } ; @@ -1014,6 +1162,13 @@ antispoof_opt : label { } antispoof_opts.label = $1; } + | RTABLE number { + if ($2 > RT_TABLEID_MAX || $2 < 0) { + yyerror("invalid rtable id"); + YYERROR; + } + antispoof_opts.rtableid = $2; + } ; not : '!' { $$ = 1; } @@ -1100,6 +1255,10 @@ table_opt : STRING { yyerror("\"no-route\" is not permitted " "inside tables"); break; + case PF_ADDR_URPFFAILED: + yyerror("\"urpf-failed\" is not " + "permitted inside tables"); + break; default: yyerror("unknown address type %d", n->addr.type); @@ -1499,6 +1658,7 @@ pfrule : action dir logquick interface route af proto fromto struct node_proto *proto; int srctrack = 0; int statelock = 0; + int adaptive = 0; if (check_rulestate(PFCTL_STATE_FILTER)) YYERROR; @@ -1524,8 +1684,10 @@ pfrule : action dir logquick interface route af proto fromto } r.direction = $2; r.log = $3.log; + r.logif = $3.logif; r.quick = $3.quick; r.prob = $9.prob; + r.rtableid = $9.rtableid; r.af = $6; if ($9.tag) @@ -1543,11 +1705,15 @@ pfrule : action dir logquick interface route af proto fromto YYERROR; } r.match_tag_not = $9.match_tag_not; - r.flags = $9.flags.b1; - r.flagset = $9.flags.b2; if (rule_label(&r, $9.label)) YYERROR; free($9.label); + r.flags = $9.flags.b1; + r.flagset = $9.flags.b2; + if (($9.flags.b1 & $9.flags.b2) != $9.flags.b1) { + yyerror("flags always false"); + YYERROR; + } if ($9.flags.b1 || $9.flags.b2 || $8.src_os) { for (proto = $7; proto != NULL && proto->proto != IPPROTO_TCP; @@ -1575,6 +1741,12 @@ pfrule : action dir logquick interface route af proto fromto r.tos = $9.tos; r.keep_state = $9.keep.action; + + /* 'keep state' by default on pass rules. */ + if (!r.keep_state && !r.action && + !($9.marker & FOM_KEEP)) + r.keep_state = PF_STATE_NORMAL; + o = $9.keep.options; while (o) { struct node_state_opt *p = o; @@ -1671,8 +1843,8 @@ pfrule : action dir logquick interface route af proto fromto if (o->data.max_src_conn_rate.limit > PF_THRESHOLD_MAX) { yyerror("'max-src-conn-rate' " - "maximum rate must be < %u", - PF_THRESHOLD_MAX); + "maximum rate must be < %u", + PF_THRESHOLD_MAX); YYERROR; } r.max_src_conn_rate.limit = @@ -1709,6 +1881,11 @@ pfrule : action dir logquick interface route af proto fromto r.rule_flag |= o->data.statelock; break; case PF_STATE_OPT_TIMEOUT: + if (o->data.timeout.number == + PFTM_ADAPTIVE_START || + o->data.timeout.number == + PFTM_ADAPTIVE_END) + adaptive = 1; if (r.timeout[o->data.timeout.number]) { yyerror("state timeout %s " "multiple definitions", @@ -1722,6 +1899,20 @@ pfrule : action dir logquick interface route af proto fromto o = o->next; free(p); } + + /* 'flags S/SA' by default on stateful rules */ + if (!r.action && !r.flags && !r.flagset && + !$9.fragment && !($9.marker & FOM_FLAGS) && + r.keep_state) { + r.flags = parse_flags("S"); + r.flagset = parse_flags("SA"); + } + if (!adaptive && r.max_states) { + r.timeout[PFTM_ADAPTIVE_START] = + (r.max_states / 10) * 6; + r.timeout[PFTM_ADAPTIVE_END] = + (r.max_states / 10) * 12; + } if (r.rule_flag & PFRULE_SRCTRACK) { if (srctrack == PF_SRCTRACK_GLOBAL && r.max_src_nodes) { @@ -1832,11 +2023,15 @@ pfrule : action dir logquick interface route af proto fromto } ; -filter_opts : { bzero(&filter_opts, sizeof filter_opts); } +filter_opts : { + bzero(&filter_opts, sizeof filter_opts); + filter_opts.rtableid = -1; + } filter_opts_l { $$ = filter_opts; } | /* empty */ { bzero(&filter_opts, sizeof filter_opts); + filter_opts.rtableid = -1; $$ = filter_opts; } ; @@ -1940,6 +2135,13 @@ filter_opt : USER uids { filter_opts.prob = (u_int32_t)p; free($2); } + | RTABLE number { + if ($2 > RT_TABLEID_MAX || $2 < 0) { + yyerror("invalid rtable id"); + YYERROR; + } + filter_opts.rtableid = $2; + } ; action : PASS { $$.b1 = PF_PASS; $$.b2 = $$.w = 0; } @@ -2021,15 +2223,55 @@ dir : /* empty */ { $$ = 0; } | OUT { $$ = PF_OUT; } ; -logquick : /* empty */ { $$.log = 0; $$.quick = 0; } - | log { $$.log = $1; $$.quick = 0; } - | QUICK { $$.log = 0; $$.quick = 1; } - | log QUICK { $$.log = $1; $$.quick = 1; } - | QUICK log { $$.log = $2; $$.quick = 1; } +quick : /* empty */ { $$.quick = 0; } + | QUICK { $$.quick = 1; } + ; + +logquick : /* empty */ { $$.log = 0; $$.quick = 0; $$.logif = 0; } + | log { $$ = $1; $$.quick = 0; } + | QUICK { $$.quick = 1; $$.log = 0; $$.logif = 0; } + | log QUICK { $$ = $1; $$.quick = 1; } + | QUICK log { $$ = $2; $$.quick = 1; } + ; + +log : LOG { $$.log = PF_LOG; $$.logif = 0; } + | LOG '(' logopts ')' { + $$.log = PF_LOG | $3.log; + $$.logif = $3.logif; + } ; -log : LOG { $$ = 1; } - | LOGALL { $$ = 2; } +logopts : logopt { $$ = $1; } + | logopts comma logopt { + $$.log = $1.log | $3.log; + $$.logif = $3.logif; + if ($$.logif == 0) + $$.logif = $1.logif; + } + ; + +logopt : ALL { $$.log = PF_LOG_ALL; $$.logif = 0; } + | USER { $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; } + | GROUP { $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; } + | TO string { + const char *errstr; + u_int i; + + $$.log = 0; + if (strncmp($2, "pflog", 5)) { + yyerror("%s: should be a pflog interface", $2); + free($2); + YYERROR; + } + i = strtonum($2 + 5, 0, 255, &errstr); + if (errstr) { + yyerror("%s: %s", $2, errstr); + free($2); + YYERROR; + } + free($2); + $$.logif = i; + } ; interface : /* empty */ { $$ = NULL; } @@ -2062,7 +2304,7 @@ if_item : STRING { YYERROR; } - if ((n = ifa_exists($1, 1)) != NULL) + if ((n = ifa_exists($1)) != NULL) $$->ifa_flags = n->ifa_flags; free($1); @@ -2176,6 +2418,9 @@ to : /* empty */ { $$.port = NULL; } | TO ipportspec { + if (disallow_urpf_failed($2.host, "\"urpf-failed\" is " + "not permitted in a destination address")) + YYERROR; $$ = $2; } ; @@ -2199,8 +2444,8 @@ ipspec : ANY { $$ = NULL; } | '{' host_list '}' { $$ = $2; } ; -host_list : xhost { $$ = $1; } - | host_list comma xhost { +host_list : ipspec { $$ = $1; } + | host_list comma ipspec { if ($3 == NULL) $$ = $1; else if ($1 == NULL) @@ -2220,12 +2465,22 @@ xhost : not host { n->not = $1; $$ = $2; } - | NOROUTE { + | not NOROUTE { $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) err(1, "xhost: calloc"); $$->addr.type = PF_ADDR_NOROUTE; $$->next = NULL; + $$->not = $1; + $$->tail = $$; + } + | not URPFFAILED { + $$ = calloc(1, sizeof(struct node_host)); + if ($$ == NULL) + err(1, "xhost: calloc"); + $$->addr.type = PF_ADDR_URPFFAILED; + $$->next = NULL; + $$->not = $1; $$->tail = $$; } ; @@ -2428,31 +2683,13 @@ port_item : port { port : STRING { char *p = strchr($1, ':'); - struct servent *s = NULL; - u_long ulval; if (p == NULL) { - if (atoul($1, &ulval) == 0) { - if (ulval > 65535) { - free($1); - yyerror("illegal port value %lu", - ulval); - YYERROR; - } - $$.a = htons(ulval); - } else { - s = getservbyname($1, "tcp"); - if (s == NULL) - s = getservbyname($1, "udp"); - if (s == NULL) { - yyerror("unknown port %s", $1); - free($1); - YYERROR; - } - $$.a = s->s_port; + if (($$.a = getservice($1)) == -1) { + free($1); + YYERROR; } - $$.b = 0; - $$.t = 0; + $$.b = $$.t = 0; } else { int port[2]; @@ -2649,6 +2886,7 @@ flag : STRING { flags : FLAGS flag '/' flag { $$.b1 = $2.b1; $$.b2 = $4.b1; } | FLAGS '/' flag { $$.b1 = 0; $$.b2 = $3.b1; } + | FLAGS ANY { $$.b1 = 0; $$.b2 = 0; } ; icmpspec : ICMPTYPE icmp_item { $$ = $2; } @@ -2786,7 +3024,8 @@ icmp6type : STRING { if (atoul($1, &ulval) == 0) { if (ulval > 255) { - yyerror("illegal icmp6-type %lu", ulval); + yyerror("illegal icmp6-type %lu", + ulval); free($1); YYERROR; } @@ -2832,15 +3071,16 @@ sourcetrack : SOURCETRACK { $$ = PF_SRCTRACK; } statelock : IFBOUND { $$ = PFRULE_IFBOUND; } - | GRBOUND { - $$ = PFRULE_GRBOUND; - } | FLOATING { $$ = 0; } ; -keep : KEEP STATE state_opt_spec { +keep : NO STATE { + $$.action = 0; + $$.options = NULL; + } + | KEEP STATE state_opt_spec { $$.action = PF_STATE_NORMAL; $$.options = $3; } @@ -3199,29 +3439,41 @@ redirection : /* empty */ { $$ = NULL; } } ; -natpass : /* empty */ { $$ = 0; } - | PASS { $$ = 1; } +natpass : /* empty */ { $$.b1 = $$.b2 = 0; } + | PASS { $$.b1 = 1; $$.b2 = 0; } + | PASS log { $$.b1 = 1; $$.b2 = $2.log; $$.w2 = $2.logif; } ; nataction : no NAT natpass { - $$.b2 = $$.w = 0; + if ($1 && $3.b1) { + yyerror("\"pass\" not valid with \"no\""); + YYERROR; + } if ($1) $$.b1 = PF_NONAT; else $$.b1 = PF_NAT; - $$.b2 = $3; + $$.b2 = $3.b1; + $$.w = $3.b2; + $$.w2 = $3.w2; } | no RDR natpass { - $$.b2 = $$.w = 0; + if ($1 && $3.b1) { + yyerror("\"pass\" not valid with \"no\""); + YYERROR; + } if ($1) $$.b1 = PF_NORDR; else $$.b1 = PF_RDR; - $$.b2 = $3; + $$.b2 = $3.b1; + $$.w = $3.b2; + $$.w2 = $3.w2; } ; -natrule : nataction interface af proto fromto tag tagged redirpool pool_opts +natrule : nataction interface af proto fromto tag tagged rtable + redirpool pool_opts { struct pf_rule r; @@ -3232,6 +3484,8 @@ natrule : nataction interface af proto fromto tag tagged redirpool pool_opts r.action = $1.b1; r.natpass = $1.b2; + r.log = $1.w; + r.logif = $1.w2; r.af = $3; if (!r.af) { @@ -3259,47 +3513,48 @@ natrule : nataction interface af proto fromto tag tagged redirpool pool_opts YYERROR; } r.match_tag_not = $7.neg; + r.rtableid = $8; if (r.action == PF_NONAT || r.action == PF_NORDR) { - if ($8 != NULL) { + if ($9 != NULL) { yyerror("translation rule with 'no' " "does not need '->'"); YYERROR; } } else { - if ($8 == NULL || $8->host == NULL) { + if ($9 == NULL || $9->host == NULL) { yyerror("translation rule requires '-> " "address'"); YYERROR; } - if (!r.af && ! $8->host->ifindex) - r.af = $8->host->af; + if (!r.af && ! $9->host->ifindex) + r.af = $9->host->af; - remove_invalid_hosts(&$8->host, &r.af); - if (invalid_redirect($8->host, r.af)) + remove_invalid_hosts(&$9->host, &r.af); + if (invalid_redirect($9->host, r.af)) YYERROR; - if (check_netmask($8->host, r.af)) + if (check_netmask($9->host, r.af)) YYERROR; - r.rpool.proxy_port[0] = ntohs($8->rport.a); + r.rpool.proxy_port[0] = ntohs($9->rport.a); switch (r.action) { case PF_RDR: - if (!$8->rport.b && $8->rport.t && + if (!$9->rport.b && $9->rport.t && $5.dst.port != NULL) { r.rpool.proxy_port[1] = - ntohs($8->rport.a) + + ntohs($9->rport.a) + (ntohs( $5.dst.port->port[1]) - ntohs( $5.dst.port->port[0])); } else r.rpool.proxy_port[1] = - ntohs($8->rport.b); + ntohs($9->rport.b); break; case PF_NAT: r.rpool.proxy_port[1] = - ntohs($8->rport.b); + ntohs($9->rport.b); if (!r.rpool.proxy_port[0] && !r.rpool.proxy_port[1]) { r.rpool.proxy_port[0] = @@ -3314,25 +3569,25 @@ natrule : nataction interface af proto fromto tag tagged redirpool pool_opts break; } - r.rpool.opts = $9.type; + r.rpool.opts = $10.type; if ((r.rpool.opts & PF_POOL_TYPEMASK) == - PF_POOL_NONE && ($8->host->next != NULL || - $8->host->addr.type == PF_ADDR_TABLE || - DYNIF_MULTIADDR($8->host->addr))) + PF_POOL_NONE && ($9->host->next != NULL || + $9->host->addr.type == PF_ADDR_TABLE || + DYNIF_MULTIADDR($9->host->addr))) r.rpool.opts = PF_POOL_ROUNDROBIN; if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN && - disallow_table($8->host, "tables are only " + disallow_table($9->host, "tables are only " "supported in round-robin redirection " "pools")) YYERROR; if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN && - disallow_alias($8->host, "interface (%s) " + disallow_alias($9->host, "interface (%s) " "is only supported in round-robin " "redirection pools")) YYERROR; - if ($8->host->next != NULL) { + if ($9->host->next != NULL) { if ((r.rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) { yyerror("only round-robin " @@ -3343,14 +3598,14 @@ natrule : nataction interface af proto fromto tag tagged redirpool pool_opts } } - if ($9.key != NULL) - memcpy(&r.rpool.key, $9.key, + if ($10.key != NULL) + memcpy(&r.rpool.key, $10.key, sizeof(struct pf_poolhashkey)); - if ($9.opts) - r.rpool.opts |= $9.opts; + if ($10.opts) + r.rpool.opts |= $10.opts; - if ($9.staticport) { + if ($10.staticport) { if (r.action != PF_NAT) { yyerror("the 'static-port' option is " "only valid with nat rules"); @@ -3369,37 +3624,46 @@ natrule : nataction interface af proto fromto tag tagged redirpool pool_opts r.rpool.proxy_port[1] = 0; } - expand_rule(&r, $2, $8 == NULL ? NULL : $8->host, $4, + expand_rule(&r, $2, $9 == NULL ? NULL : $9->host, $4, $5.src_os, $5.src.host, $5.src.port, $5.dst.host, $5.dst.port, 0, 0, 0, ""); - free($8); + free($9); } ; -binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag tagged - redirection +binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag + tagged rtable redirection { struct pf_rule binat; struct pf_pooladdr *pa; if (check_rulestate(PFCTL_STATE_NAT)) YYERROR; + if (disallow_urpf_failed($10, "\"urpf-failed\" is not " + "permitted as a binat destination")) + YYERROR; memset(&binat, 0, sizeof(binat)); + if ($1 && $3.b1) { + yyerror("\"pass\" not valid with \"no\""); + YYERROR; + } if ($1) binat.action = PF_NOBINAT; else binat.action = PF_BINAT; - binat.natpass = $3; + binat.natpass = $3.b1; + binat.log = $3.b2; + binat.logif = $3.w2; binat.af = $5; if (!binat.af && $8 != NULL && $8->af) binat.af = $8->af; if (!binat.af && $10 != NULL && $10->af) binat.af = $10->af; - if (!binat.af && $13 != NULL && $13->host) - binat.af = $13->host->af; + if (!binat.af && $14 != NULL && $14->host) + binat.af = $14->host->af; if (!binat.af) { yyerror("address family (inet/inet6) " "undefined"); @@ -3428,6 +3692,7 @@ binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag tagged YYERROR; } binat.match_tag_not = $12.neg; + binat.rtableid = $13; if ($6 != NULL) { binat.proto = $6->proto; @@ -3441,12 +3706,12 @@ binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag tagged "interface (%s) as the source address of a binat " "rule")) YYERROR; - if ($13 != NULL && $13->host != NULL && disallow_table( - $13->host, "invalid use of table <%s> as the " + if ($14 != NULL && $14->host != NULL && disallow_table( + $14->host, "invalid use of table <%s> as the " "redirect address of a binat rule")) YYERROR; - if ($13 != NULL && $13->host != NULL && disallow_alias( - $13->host, "invalid use of interface (%s) as the " + if ($14 != NULL && $14->host != NULL && disallow_alias( + $14->host, "invalid use of interface (%s) as the " "redirect address of a binat rule")) YYERROR; @@ -3485,33 +3750,33 @@ binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag tagged } if (binat.action == PF_NOBINAT) { - if ($13 != NULL) { + if ($14 != NULL) { yyerror("'no binat' rule does not need" " '->'"); YYERROR; } } else { - if ($13 == NULL || $13->host == NULL) { + if ($14 == NULL || $14->host == NULL) { yyerror("'binat' rule requires" " '-> address'"); YYERROR; } - remove_invalid_hosts(&$13->host, &binat.af); - if (invalid_redirect($13->host, binat.af)) + remove_invalid_hosts(&$14->host, &binat.af); + if (invalid_redirect($14->host, binat.af)) YYERROR; - if ($13->host->next != NULL) { + if ($14->host->next != NULL) { yyerror("binat rule must redirect to " "a single address"); YYERROR; } - if (check_netmask($13->host, binat.af)) + if (check_netmask($14->host, binat.af)) YYERROR; if (!PF_AZERO(&binat.src.addr.v.a.mask, binat.af) && !PF_AEQ(&binat.src.addr.v.a.mask, - &$13->host->addr.v.a.mask, binat.af)) { + &$14->host->addr.v.a.mask, binat.af)) { yyerror("'binat' source mask and " "redirect mask must be the same"); YYERROR; @@ -3521,12 +3786,12 @@ binatrule : no BINAT natpass interface af proto FROM host TO ipspec tag tagged pa = calloc(1, sizeof(struct pf_pooladdr)); if (pa == NULL) err(1, "binat: calloc"); - pa->addr = $13->host->addr; + pa->addr = $14->host->addr; pa->ifname[0] = 0; TAILQ_INSERT_TAIL(&binat.rpool.list, pa, entries); - free($13); + free($14); } pfctl_add_rule(pf, &binat, ""); @@ -3541,6 +3806,16 @@ tagged : /* empty */ { $$.neg = 0; $$.name = NULL; } | not TAGGED string { $$.neg = $1; $$.name = $3; } ; +rtable : /* empty */ { $$ = -1; } + | RTABLE number { + if ($2 > RT_TABLEID_MAX || $2 < 0) { + yyerror("invalid rtable id"); + YYERROR; + } + $$ = $2; + } + ; + route_host : STRING { $$ = calloc(1, sizeof(struct node_host)); if ($$ == NULL) @@ -3701,6 +3976,17 @@ disallow_table(struct node_host *h, const char *fmt) } int +disallow_urpf_failed(struct node_host *h, const char *fmt) +{ + for (; h != NULL; h = h->next) + if (h->addr.type == PF_ADDR_URPFFAILED) { + yyerror(fmt); + return (1); + } + return (0); +} + +int disallow_alias(struct node_host *h, const char *fmt) { for (; h != NULL; h = h->next) @@ -3712,7 +3998,7 @@ disallow_alias(struct node_host *h, const char *fmt) } int -rule_consistent(struct pf_rule *r) +rule_consistent(struct pf_rule *r, int anchor_call) { int problems = 0; @@ -3721,7 +4007,7 @@ rule_consistent(struct pf_rule *r) case PF_DROP: case PF_SCRUB: case PF_NOSCRUB: - problems = filter_consistent(r); + problems = filter_consistent(r, anchor_call); break; case PF_NAT: case PF_NONAT: @@ -3740,7 +4026,7 @@ rule_consistent(struct pf_rule *r) } int -filter_consistent(struct pf_rule *r) +filter_consistent(struct pf_rule *r, int anchor_call) { int problems = 0; @@ -3792,11 +4078,6 @@ filter_consistent(struct pf_rule *r) yyerror("keep state on block rules doesn't make sense"); problems++; } - if ((r->tagname[0] || r->match_tagname[0]) && !r->keep_state && - r->action == PF_PASS) { - yyerror("tags cannot be used without keep state"); - problems++; - } return (-problems); } @@ -3864,7 +4145,7 @@ process_tabledef(char *name, struct table_opts *opts) &opts->init_nodes); if (!(pf->opts & PF_OPT_NOACTION) && pfctl_define_table(name, opts->flags, opts->init_addr, - pf->anchor, &ab, pf->tticket)) { + pf->anchor->name, &ab, pf->anchor->ruleset.tticket)) { yyerror("cannot define table %s: %s", name, pfr_strerror(errno)); goto _error; @@ -3963,6 +4244,9 @@ expand_label_addr(const char *name, char *label, size_t len, sa_family_t af, case PF_ADDR_NOROUTE: snprintf(tmp, sizeof(tmp), "no-route"); break; + case PF_ADDR_URPFFAILED: + snprintf(tmp, sizeof(tmp), "urpf-failed"); + break; case PF_ADDR_ADDRMASK: if (!af || (PF_AZERO(&h->addr.v.a.addr, af) && PF_AZERO(&h->addr.v.a.mask, af))) @@ -4053,7 +4337,7 @@ expand_label_nr(const char *name, char *label, size_t len) char n[11]; if (strstr(label, name) != NULL) { - snprintf(n, sizeof(n), "%u", pf->rule_nr); + snprintf(n, sizeof(n), "%u", pf->anchor->match); expand_label_str(label, len, name, n); } } @@ -4480,10 +4764,10 @@ expand_rule(struct pf_rule *r, TAILQ_INSERT_TAIL(&r->rpool.list, pa, entries); } - if (rule_consistent(r) < 0 || error) + if (rule_consistent(r, anchor_call[0]) < 0 || error) yyerror("skipping rule due to errors"); else { - r->nr = pf->rule_nr++; + r->nr = pf->astack[pf->asd]->match++; pfctl_add_rule(pf, r, anchor_call); added++; } @@ -4598,7 +4882,6 @@ lookup(char *s) { "from", FROM}, { "global", GLOBAL}, { "group", GROUP}, - { "group-bound", GRBOUND}, { "hfsc", HFSC}, { "hostid", HOSTID}, { "icmp-type", ICMPTYPE}, @@ -4613,7 +4896,6 @@ lookup(char *s) { "linkshare", LINKSHARE}, { "load", LOAD}, { "log", LOG}, - { "log-all", LOGALL}, { "loginterface", LOGINTERFACE}, { "max", MAXIMUM}, { "max-mss", MAXMSS}, @@ -4658,7 +4940,9 @@ lookup(char *s) { "round-robin", ROUNDROBIN}, { "route", ROUTE}, { "route-to", ROUTETO}, + { "rtable", RTABLE}, { "rule", RULE}, + { "ruleset-optimization", RULESET_OPTIMIZATION}, { "scrub", SCRUB}, { "set", SET}, { "skip", SKIP}, @@ -4678,6 +4962,7 @@ lookup(char *s) { "tos", TOS}, { "ttl", TTL}, { "upperlimit", UPPERLIMIT}, + { "urpf-failed", URPFFAILED}, { "user", USER}, }; const struct keywords *p; @@ -4725,9 +5010,7 @@ lgetc(FILE *f) while ((c = getc(f)) == '\\') { next = getc(f); if (next != '\n') { - if (isspace(next)) - yyerror("whitespace after \\"); - ungetc(next, f); + c = next; break; } yylval.lineno = lineno; @@ -5015,21 +5298,40 @@ symget(const char *nam) } void -decide_address_family(struct node_host *n, sa_family_t *af) +mv_rules(struct pf_ruleset *src, struct pf_ruleset *dst) { - sa_family_t target_af = 0; + int i; + struct pf_rule *r; + + for (i = 0; i < PF_RULESET_MAX; ++i) { + while ((r = TAILQ_FIRST(src->rules[i].active.ptr)) + != NULL) { + TAILQ_REMOVE(src->rules[i].active.ptr, r, entries); + TAILQ_INSERT_TAIL(dst->rules[i].active.ptr, r, entries); + dst->anchor->match++; + } + src->anchor->match = 0; + while ((r = TAILQ_FIRST(src->rules[i].inactive.ptr)) + != NULL) { + TAILQ_REMOVE(src->rules[i].inactive.ptr, r, entries); + TAILQ_INSERT_TAIL(dst->rules[i].inactive.ptr, + r, entries); + } + } +} - while (!*af && n != NULL) { - if (n->af) { - if (target_af == 0) - target_af = n->af; - if (target_af != n->af) - return; +void +decide_address_family(struct node_host *n, sa_family_t *af) +{ + if (*af != 0 || n == NULL) + return; + *af = n->af; + while ((n = n->next) != NULL) { + if (n->af != *af) { + *af = 0; + return; } - n = n->next; } - if (!*af && target_af) - *af = target_af; } void @@ -5170,19 +5472,23 @@ parseicmpspec(char *w, sa_family_t af) } int -pfctl_load_anchors(int dev, int opts, struct pfr_buffer *trans) +pfctl_load_anchors(int dev, struct pfctl *pf, struct pfr_buffer *trans) { struct loadanchors *la; + FILE *fin; TAILQ_FOREACH(la, &loadanchorshead, entries) { - if (opts & PF_OPT_VERBOSE) + if (pf->opts & PF_OPT_VERBOSE) fprintf(stderr, "\nLoading anchor %s from %s\n", la->anchorname, la->filename); - if (pfctl_rules(dev, la->filename, opts, la->anchorname, - trans) == -1) + if ((fin = pfctl_fopen(la->filename, "r")) == NULL) { + warn("%s", la->filename); + continue; + } + if (pfctl_rules(dev, la->filename, fin, pf->opts, pf->optimize, + la->anchorname, trans) == -1) return (-1); } return (0); } - diff --git a/contrib/pf/pfctl/pf_print_state.c b/contrib/pf/pfctl/pf_print_state.c index a41e9e5..e36b1fd 100644 --- a/contrib/pf/pfctl/pf_print_state.c +++ b/contrib/pf/pfctl/pf_print_state.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_print_state.c,v 1.40 2004/12/10 22:13:26 henning Exp $ */ +/* $OpenBSD: pf_print_state.c,v 1.44 2007/03/01 17:20:53 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -96,6 +96,9 @@ print_addr(struct pf_addr_wrap *addr, sa_family_t af, int verbose) case PF_ADDR_NOROUTE: printf("no-route"); return; + case PF_ADDR_URPFFAILED: + printf("urpf-failed"); + return; case PF_ADDR_RTLABEL: printf("route \"%s\"", addr->v.rtlabelname); return; @@ -274,7 +277,7 @@ print_state(struct pf_state *s, int opts) min = s->expire % 60; s->expire /= 60; printf(", expires in %.2u:%.2u:%.2u", s->expire, min, sec); - printf(", %u:%u pkts, %u:%u bytes", + printf(", %llu:%llu pkts, %llu:%llu bytes", s->packets[0], s->packets[1], s->bytes[0], s->bytes[1]); if (s->anchor.nr != -1) printf(", anchor %u", s->anchor.nr); @@ -287,8 +290,9 @@ print_state(struct pf_state *s, int opts) printf("\n"); } if (opts & PF_OPT_VERBOSE2) { - printf(" id: %016llx creatorid: %08x\n", - betoh64(s->id), ntohl(s->creatorid)); + printf(" id: %016llx creatorid: %08x%s\n", + betoh64(s->id), ntohl(s->creatorid), + ((s->sync_flags & PFSTATE_NOSYNC) ? " (no-sync)" : "")); } } diff --git a/contrib/pf/pfctl/pfctl.8 b/contrib/pf/pfctl/pfctl.8 index 9fdb00e..b5be8a1 100644 --- a/contrib/pf/pfctl/pfctl.8 +++ b/contrib/pf/pfctl/pfctl.8 @@ -1,4 +1,4 @@ -.\" $OpenBSD: pfctl.8,v 1.118 2005/01/05 23:41:45 jmc Exp $ +.\" $OpenBSD: pfctl.8,v 1.128 2007/01/30 21:01:56 jmc Exp $ .\" .\" Copyright (c) 2001 Kjell Wooding. All rights reserved. .\" @@ -33,23 +33,23 @@ .Sh SYNOPSIS .Nm pfctl .Bk -words -.Op Fl AdeghmNnOoqRrvz +.Op Fl AdeghmNnOqRrvz .Op Fl a Ar anchor -.Xo -.Oo Fl D -.Ar macro Ns = Ns Ar value Oc -.Xc +.Oo Fl D Ar macro Ns = +.Ar value Oc .Op Fl F Ar modifier .Op Fl f Ar file .Op Fl i Ar interface -.Op Fl k Ar host +.Op Fl K Ar host | network +.Op Fl k Ar host | network +.Op Fl o Op Ar level .Op Fl p Ar device .Op Fl s Ar modifier -.Oo Xo +.Oo .Fl t Ar table .Fl T Ar command -.Op Ar address ... Oc -.Xc +.Op Ar address ... +.Oc .Op Fl x Ar level .Ek .Sh DESCRIPTION @@ -138,8 +138,10 @@ rules from the main ruleset is described in For example, the following will show all filter rules (see the .Fl s flag below) inside the anchor -.Li authpf/smith(1234) , -which would have been created for user smith by +.Dq authpf/smith(1234) , +which would have been created for user +.Dq smith +by .Xr authpf 8 , PID 1234: .Bd -literal -offset indent @@ -161,6 +163,27 @@ This is similar to C rules for variable scope. It is possible to create distinct tables with the same name in the global ruleset and in an anchor, but this is often bad design and a warning will be issued in that case. +.Pp +By default, recursive inline printing of anchors applies only to unnamed +anchors specified inline in the ruleset. +If the anchor name is terminated with a +.Sq * +character, the +.Fl s +flag will recursively print all anchors in a brace delimited block. +For example the following will print the +.Dq authpf +ruleset recursively: +.Bd -literal -offset indent +# pfctl -a 'authpf/*' -sr +.Ed +.Pp +To print the main ruleset recursively, specify only +.Sq * +as the anchor name: +.Bd -literal -offset indent +# pfctl -a '*' -sr +.Ed .It Fl D Ar macro Ns = Ns Ar value Define .Ar macro @@ -215,29 +238,49 @@ Help. .It Fl i Ar interface Restrict the operation to the given .Ar interface . -.It Fl k Ar host +.It Fl K Ar host | network +Kill all of the source tracking entries originating from the specified +.Ar host +or +.Ar network . +A second +.Fl K Ar host +or +.Fl K Ar network +option may be specified, which will kill all the source tracking +entries from the first host/network to the second. +.It Fl k Ar host | network Kill all of the state entries originating from the specified -.Ar host . +.Ar host +or +.Ar network . A second .Fl k Ar host +or +.Fl k Ar network option may be specified, which will kill all the state entries -from the first -.Ar host -to the second -.Ar host . +from the first host/network to the second. For example, to kill all of the state entries originating from -.Li host : -.Bd -literal -offset indent -# pfctl -k host -.Ed +.Dq host : +.Pp +.Dl # pfctl -k host .Pp To kill all of the state entries from -.Li host1 +.Dq host1 to -.Li host2 : -.Bd -literal -offset indent -# pfctl -k host1 -k host2 -.Ed +.Dq host2 : +.Pp +.Dl # pfctl -k host1 -k host2 +.Pp +To kill all states originating from 192.168.1.0/24 to 172.16.0.0/16: +.Pp +.Dl # pfctl -k 192.168.1.0/24 -k 172.16.0.0/16 +.Pp +A network prefix length of 0 can be used as a wildcard. +To kill all states with the target +.Dq host2 : +.Pp +.Dl # pfctl -k 0.0.0.0/0 -k host2 .It Fl m Merge in explicitly given options without resetting those which are omitted. @@ -253,11 +296,22 @@ Do not actually load rules, just parse them. .It Fl O Load only the options present in the rule file. Other rules and options are ignored. -.It Fl o -Enable the ruleset optimizer. +.It Fl o Op Ar level +Control the ruleset optimizer. The ruleset optimizer attempts to improve rulesets by removing rule duplication and making better use of rule ordering. -Specifically, it does four things: +.Pp +.Bl -tag -width xxxxxxxxxxxx -compact +.It Fl o Cm none +Disable the ruleset optimizer. +.It Fl o Cm basic +Enable basic ruleset optimizations. +.It Fl o Cm profile +Enable basic ruleset optimizations with profiling. +.El +.Pp +.Cm basic +optimization does does four things: .Pp .Bl -enum -compact .It @@ -270,10 +324,10 @@ combine multiple rules into a table when advantageous re-order the rules to improve evaluation performance .El .Pp -A second -.Fl o -may be specified to use the currently loaded ruleset as a feedback profile -to tailor the optimization of the +If +.Cm profile +is specified, the currently loaded ruleset will be examined as a feedback +profile to tailor the optimization of the .Ar quick rules to the actual network behavior. .Pp @@ -286,6 +340,14 @@ the ruleset optimizer should not be used or a .Ar label field should be added to all of the accounting rules to act as optimization barriers. +.Pp +To retain compatibility with previous behaviour, a single +.Fl o +without any options will enable +.Cm basic +optimizations, and a second +.Fl o +will enable profiling. .It Fl p Ar device Use the device file .Ar device @@ -350,7 +412,8 @@ When used together with .Fl v , source tracking statistics are also shown. .It Fl s Cm labels -Show per-rule statistics (label, evaluations, packets, bytes) of +Show per-rule statistics (label, evaluations, packets total, bytes total, +packets in, bytes in, packets out, bytes out) of filter rules with labels, useful for accounting. .It Fl s Cm timeouts Show the current global timeouts. @@ -362,8 +425,11 @@ Show the list of tables. Show the list of operating system fingerprints. .It Fl s Cm Interfaces Show the list of interfaces and interface drivers available to PF. -When used together with a double +When used together with .Fl v , +it additionally lists which interfaces have skip rules activated. +When used together with +.Fl vv , interface statistics are also shown. .Fl i can be used to select an interface or a group of interfaces. @@ -387,6 +453,13 @@ Add one or more addresses in a table. Automatically create a nonexisting table. .It Fl T Cm delete Delete one or more addresses from a table. +.It Fl T Cm expire Ar number +Delete addresses which had their statistics cleared more than +.Ar number +seconds ago. +For entries which have never had their statistics cleared, +.Ar number +refers to the time they were added to the table. .It Fl T Cm replace Replace the addresses of the table. Automatically create a nonexisting table. @@ -463,7 +536,7 @@ The following commands configure the firewall and send 10 pings to the FTP server: .Bd -literal -offset indent # printf "table <test> { ftp.openbsd.org }\en \e - pass out to <test> keep state\en" | pfctl -f- + pass out to <test>\en" | pfctl -f- # ping -qc10 ftp.openbsd.org .Ed .Pp diff --git a/contrib/pf/pfctl/pfctl.c b/contrib/pf/pfctl/pfctl.c index 2e628e5..cf338c7 100644 --- a/contrib/pf/pfctl/pfctl.c +++ b/contrib/pf/pfctl/pfctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl.c,v 1.234 2005/03/07 13:52:50 henning Exp $ */ +/* $OpenBSD: pfctl.c,v 1.262 2007/03/01 17:20:53 deraadt Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -41,6 +41,7 @@ #include <net/pfvar.h> #include <arpa/inet.h> #include <altq/altq.h> +#include <sys/sysctl.h> #include <err.h> #include <errno.h> @@ -65,6 +66,8 @@ int pfctl_clear_nat(int, int, char *); int pfctl_clear_altq(int, int); int pfctl_clear_src_nodes(int, int); int pfctl_clear_states(int, const char *, int); +void pfctl_addrprefix(char *, struct pf_addr *); +int pfctl_kill_src_nodes(int, const char *, int); int pfctl_kill_states(int, const char *, int); void pfctl_init_options(struct pfctl *); int pfctl_load_options(struct pfctl *); @@ -76,7 +79,7 @@ int pfctl_load_hostid(struct pfctl *, unsigned int); int pfctl_get_pool(int, struct pf_pool *, u_int32_t, u_int32_t, int, char *); void pfctl_print_rule_counters(struct pf_rule *, int); -int pfctl_show_rules(int, int, int, char *); +int pfctl_show_rules(int, char *, int, enum pfctl_show, char *, int); int pfctl_show_nat(int, int, char *); int pfctl_show_src_nodes(int, int); int pfctl_show_states(int, const char *, int); @@ -84,20 +87,29 @@ int pfctl_show_status(int, int); int pfctl_show_timeouts(int, int); int pfctl_show_limits(int, int); void pfctl_debug(int, u_int32_t, int); -int pfctl_clear_rule_counters(int, int); int pfctl_test_altqsupport(int, int); int pfctl_show_anchors(int, int, char *); +int pfctl_ruleset_trans(struct pfctl *, char *, struct pf_anchor *); +int pfctl_load_ruleset(struct pfctl *, char *, + struct pf_ruleset *, int, int); +int pfctl_load_rule(struct pfctl *, char *, struct pf_rule *, int); const char *pfctl_lookup_option(char *, const char **); +struct pf_anchor_global pf_anchors; +struct pf_anchor pf_main_anchor; + const char *clearopt; char *rulesopt; const char *showopt; const char *debugopt; char *anchoropt; +const char *optiopt = NULL; char *pf_device = "/dev/pf"; char *ifaceopt; char *tableopt; const char *tblcmdopt; +int src_node_killers; +char *src_node_kill[2]; int state_killers; char *state_kill[2]; int loadopt; @@ -109,14 +121,25 @@ int labels = 0; const char *infile; +#define INDENT(d, o) do { \ + if (o) { \ + int i; \ + for (i=0; i < d; i++) \ + printf(" "); \ + } \ + } while (0); \ + + static const struct { const char *name; int index; } pf_limits[] = { - { "states", PF_LIMIT_STATES }, - { "src-nodes", PF_LIMIT_SRC_NODES }, - { "frags", PF_LIMIT_FRAGS }, - { NULL, 0 } + { "states", PF_LIMIT_STATES }, + { "src-nodes", PF_LIMIT_SRC_NODES }, + { "frags", PF_LIMIT_FRAGS }, + { "tables", PF_LIMIT_TABLES }, + { "table-entries", PF_LIMIT_TABLE_ENTRIES }, + { NULL, 0 } }; struct pf_hint { @@ -189,27 +212,28 @@ static const char *showopt_list[] = { static const char *tblcmdopt_list[] = { "kill", "flush", "add", "delete", "load", "replace", "show", - "test", "zero", NULL + "test", "zero", "expire", NULL }; static const char *debugopt_list[] = { "none", "urgent", "misc", "loud", NULL }; +static const char *optiopt_list[] = { + "o", "none", "basic", "profile", NULL +}; void usage(void) { extern char *__progname; - fprintf(stderr, "usage: %s [-AdeghmNnOoqRrvz] ", __progname); + fprintf(stderr, "usage: %s [-AdeghmNnOqRrvz] ", __progname); fprintf(stderr, "[-a anchor] [-D macro=value] [-F modifier]\n"); - fprintf(stderr, " "); - fprintf(stderr, "[-f file] [-i interface] [-k host] "); - fprintf(stderr, "[-p device] [-s modifier]\n"); - fprintf(stderr, " "); - fprintf(stderr, "[-t table -T command [address ...]] "); - fprintf(stderr, "[-x level]\n"); + fprintf(stderr, "\t[-f file] [-i interface] [-K host | network] "); + fprintf(stderr, "[-k host | network ]\n"); + fprintf(stderr, "\t[-o [level]] [-p device] [-s modifier ]\n"); + fprintf(stderr, "\t[-t table -T command [address ...]] [-x level]\n"); exit(1); } @@ -268,7 +292,7 @@ pfctl_clear_interface_flags(int dev, int opts) if ((opts & PF_OPT_NOACTION) == 0) { bzero(&pi, sizeof(pi)); - pi.pfiio_flags = PFI_IFLAG_SETABLE_MASK; + pi.pfiio_flags = PFI_IFLAG_SKIP; if (ioctl(dev, DIOCCLRIFFLAG, &pi)) err(1, "DIOCCLRIFFLAG"); @@ -358,6 +382,163 @@ pfctl_clear_states(int dev, const char *iface, int opts) return (0); } +void +pfctl_addrprefix(char *addr, struct pf_addr *mask) +{ + char *p; + const char *errstr; + int prefix, ret_ga, q, r; + struct addrinfo hints, *res; + + if ((p = strchr(addr, '/')) == NULL) + return; + + *p++ = '\0'; + prefix = strtonum(p, 0, 128, &errstr); + if (errstr) + errx(1, "prefix is %s: %s", errstr, p); + + bzero(&hints, sizeof(hints)); + /* prefix only with numeric addresses */ + hints.ai_flags |= AI_NUMERICHOST; + + if ((ret_ga = getaddrinfo(addr, NULL, &hints, &res))) { + errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); + /* NOTREACHED */ + } + + if (res->ai_family == AF_INET && prefix > 32) + errx(1, "prefix too long for AF_INET"); + else if (res->ai_family == AF_INET6 && prefix > 128) + errx(1, "prefix too long for AF_INET6"); + + q = prefix >> 3; + r = prefix & 7; + switch (res->ai_family) { + case AF_INET: + bzero(&mask->v4, sizeof(mask->v4)); + mask->v4.s_addr = htonl((u_int32_t) + (0xffffffffffULL << (32 - prefix))); + break; + case AF_INET6: + bzero(&mask->v6, sizeof(mask->v6)); + if (q > 0) + memset((void *)&mask->v6, 0xff, q); + if (r > 0) + *((u_char *)&mask->v6 + q) = + (0xff00 >> r) & 0xff; + break; + } + freeaddrinfo(res); +} + +int +pfctl_kill_src_nodes(int dev, const char *iface, int opts) +{ + struct pfioc_src_node_kill psnk; + struct addrinfo *res[2], *resp[2]; + struct sockaddr last_src, last_dst; + int killed, sources, dests; + int ret_ga; + + killed = sources = dests = 0; + + memset(&psnk, 0, sizeof(psnk)); + memset(&psnk.psnk_src.addr.v.a.mask, 0xff, + sizeof(psnk.psnk_src.addr.v.a.mask)); + memset(&last_src, 0xff, sizeof(last_src)); + memset(&last_dst, 0xff, sizeof(last_dst)); + + pfctl_addrprefix(src_node_kill[0], &psnk.psnk_src.addr.v.a.mask); + + if ((ret_ga = getaddrinfo(src_node_kill[0], NULL, NULL, &res[0]))) { + errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); + /* NOTREACHED */ + } + for (resp[0] = res[0]; resp[0]; resp[0] = resp[0]->ai_next) { + if (resp[0]->ai_addr == NULL) + continue; + /* We get lots of duplicates. Catch the easy ones */ + if (memcmp(&last_src, resp[0]->ai_addr, sizeof(last_src)) == 0) + continue; + last_src = *(struct sockaddr *)resp[0]->ai_addr; + + psnk.psnk_af = resp[0]->ai_family; + sources++; + + if (psnk.psnk_af == AF_INET) + psnk.psnk_src.addr.v.a.addr.v4 = + ((struct sockaddr_in *)resp[0]->ai_addr)->sin_addr; + else if (psnk.psnk_af == AF_INET6) + psnk.psnk_src.addr.v.a.addr.v6 = + ((struct sockaddr_in6 *)resp[0]->ai_addr)-> + sin6_addr; + else + errx(1, "Unknown address family %d", psnk.psnk_af); + + if (src_node_killers > 1) { + dests = 0; + memset(&psnk.psnk_dst.addr.v.a.mask, 0xff, + sizeof(psnk.psnk_dst.addr.v.a.mask)); + memset(&last_dst, 0xff, sizeof(last_dst)); + pfctl_addrprefix(src_node_kill[1], + &psnk.psnk_dst.addr.v.a.mask); + if ((ret_ga = getaddrinfo(src_node_kill[1], NULL, NULL, + &res[1]))) { + errx(1, "getaddrinfo: %s", + gai_strerror(ret_ga)); + /* NOTREACHED */ + } + for (resp[1] = res[1]; resp[1]; + resp[1] = resp[1]->ai_next) { + if (resp[1]->ai_addr == NULL) + continue; + if (psnk.psnk_af != resp[1]->ai_family) + continue; + + if (memcmp(&last_dst, resp[1]->ai_addr, + sizeof(last_dst)) == 0) + continue; + last_dst = *(struct sockaddr *)resp[1]->ai_addr; + + dests++; + + if (psnk.psnk_af == AF_INET) + psnk.psnk_dst.addr.v.a.addr.v4 = + ((struct sockaddr_in *)resp[1]-> + ai_addr)->sin_addr; + else if (psnk.psnk_af == AF_INET6) + psnk.psnk_dst.addr.v.a.addr.v6 = + ((struct sockaddr_in6 *)resp[1]-> + ai_addr)->sin6_addr; + else + errx(1, "Unknown address family %d", + psnk.psnk_af); + + if (ioctl(dev, DIOCKILLSRCNODES, &psnk)) + err(1, "DIOCKILLSRCNODES"); + killed += psnk.psnk_af; + /* fixup psnk.psnk_af */ + psnk.psnk_af = resp[1]->ai_family; + } + freeaddrinfo(res[1]); + } else { + if (ioctl(dev, DIOCKILLSRCNODES, &psnk)) + err(1, "DIOCKILLSRCNODES"); + killed += psnk.psnk_af; + /* fixup psnk.psnk_af */ + psnk.psnk_af = res[0]->ai_family; + } + } + + freeaddrinfo(res[0]); + + if ((opts & PF_OPT_QUIET) == 0) + fprintf(stderr, "killed %d src nodes from %d sources and %d " + "destinations\n", killed, sources, dests); + return (0); +} + int pfctl_kill_states(int dev, const char *iface, int opts) { @@ -378,6 +559,8 @@ pfctl_kill_states(int dev, const char *iface, int opts) sizeof(psk.psk_ifname)) >= sizeof(psk.psk_ifname)) errx(1, "invalid interface: %s", iface); + pfctl_addrprefix(state_kill[0], &psk.psk_src.addr.v.a.mask); + if ((ret_ga = getaddrinfo(state_kill[0], NULL, NULL, &res[0]))) { errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); /* NOTREACHED */ @@ -408,6 +591,8 @@ pfctl_kill_states(int dev, const char *iface, int opts) memset(&psk.psk_dst.addr.v.a.mask, 0xff, sizeof(psk.psk_dst.addr.v.a.mask)); memset(&last_dst, 0xff, sizeof(last_dst)); + pfctl_addrprefix(state_kill[1], + &psk.psk_dst.addr.v.a.mask); if ((ret_ga = getaddrinfo(state_kill[1], NULL, NULL, &res[1]))) { errx(1, "getaddrinfo: %s", @@ -500,6 +685,17 @@ pfctl_get_pool(int dev, struct pf_pool *pool, u_int32_t nr, } void +pfctl_move_pool(struct pf_pool *src, struct pf_pool *dst) +{ + struct pf_pooladdr *pa; + + while ((pa = TAILQ_FIRST(&src->list)) != NULL) { + TAILQ_REMOVE(&src->list, pa, entries); + TAILQ_INSERT_TAIL(&dst->list, pa, entries); + } +} + +void pfctl_clear_pool(struct pf_pool *pool) { struct pf_pooladdr *pa; @@ -533,12 +729,18 @@ pfctl_print_rule_counters(struct pf_rule *rule, int opts) printf(" [ queue: qname=%s qid=%u pqname=%s pqid=%u ]\n", rule->qname, rule->qid, rule->pqname, rule->pqid); } - if (opts & PF_OPT_VERBOSE) + if (opts & PF_OPT_VERBOSE) { printf(" [ Evaluations: %-8llu Packets: %-8llu " "Bytes: %-10llu States: %-6u]\n", (unsigned long long)rule->evaluations, - (unsigned long long)rule->packets, - (unsigned long long)rule->bytes, rule->states); + (unsigned long long)(rule->packets[0] + + rule->packets[1]), + (unsigned long long)(rule->bytes[0] + + rule->bytes[1]), rule->states); + if (!(opts & PF_OPT_DEBUG)) + printf(" [ Inserted: uid %u pid %u ]\n", + (unsigned)rule->cuid, (unsigned)rule->cpid); + } } void @@ -551,99 +753,160 @@ pfctl_print_title(char *title) } int -pfctl_show_rules(int dev, int opts, int format, char *anchorname) +pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format, + char *anchorname, int depth) { struct pfioc_rule pr; u_int32_t nr, mnr, header = 0; int rule_numbers = opts & (PF_OPT_VERBOSE2 | PF_OPT_DEBUG); + int len = strlen(path); + int brace; + char *p; + + if (path[0]) + snprintf(&path[len], MAXPATHLEN - len, "/%s", anchorname); + else + snprintf(&path[len], MAXPATHLEN - len, "%s", anchorname); memset(&pr, 0, sizeof(pr)); - memcpy(pr.anchor, anchorname, sizeof(pr.anchor)); + memcpy(pr.anchor, path, sizeof(pr.anchor)); if (opts & PF_OPT_SHOWALL) { pr.rule.action = PF_PASS; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); - return (-1); + goto error; } header++; } pr.rule.action = PF_SCRUB; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); - return (-1); + goto error; } if (opts & PF_OPT_SHOWALL) { - if (format == 0 && (pr.nr > 0 || header)) + if (format == PFCTL_SHOW_RULES && (pr.nr > 0 || header)) pfctl_print_title("FILTER RULES:"); - else if (format == 1 && labels) + else if (format == PFCTL_SHOW_LABELS && labels) pfctl_print_title("LABEL COUNTERS:"); } mnr = pr.nr; + if (opts & PF_OPT_CLRRULECTRS) + pr.action = PF_GET_CLR_CNTR; + for (nr = 0; nr < mnr; ++nr) { pr.nr = nr; if (ioctl(dev, DIOCGETRULE, &pr)) { warn("DIOCGETRULE"); - return (-1); + goto error; } if (pfctl_get_pool(dev, &pr.rule.rpool, - nr, pr.ticket, PF_SCRUB, anchorname) != 0) - return (-1); + nr, pr.ticket, PF_SCRUB, path) != 0) + goto error; switch (format) { - case 1: + case PFCTL_SHOW_LABELS: if (pr.rule.label[0]) { printf("%s ", pr.rule.label); - printf("%llu %llu %llu\n", + printf("%llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)pr.rule.evaluations, - (unsigned long long)pr.rule.packets, - (unsigned long long)pr.rule.bytes); + (unsigned long long)(pr.rule.packets[0] + + pr.rule.packets[1]), + (unsigned long long)(pr.rule.bytes[0] + + pr.rule.bytes[1]), + (unsigned long long)pr.rule.packets[0], + (unsigned long long)pr.rule.bytes[0], + (unsigned long long)pr.rule.packets[1], + (unsigned long long)pr.rule.bytes[1]); } break; - default: + case PFCTL_SHOW_RULES: if (pr.rule.label[0] && (opts & PF_OPT_SHOWALL)) labels = 1; print_rule(&pr.rule, pr.anchor_call, rule_numbers); + printf("\n"); pfctl_print_rule_counters(&pr.rule, opts); + break; + case PFCTL_SHOW_NOTHING: + break; } pfctl_clear_pool(&pr.rule.rpool); } pr.rule.action = PF_PASS; if (ioctl(dev, DIOCGETRULES, &pr)) { warn("DIOCGETRULES"); - return (-1); + goto error; } mnr = pr.nr; for (nr = 0; nr < mnr; ++nr) { pr.nr = nr; if (ioctl(dev, DIOCGETRULE, &pr)) { warn("DIOCGETRULE"); - return (-1); + goto error; } if (pfctl_get_pool(dev, &pr.rule.rpool, - nr, pr.ticket, PF_PASS, anchorname) != 0) - return (-1); + nr, pr.ticket, PF_PASS, path) != 0) + goto error; switch (format) { - case 1: + case PFCTL_SHOW_LABELS: if (pr.rule.label[0]) { printf("%s ", pr.rule.label); - printf("%llu %llu %llu\n", + printf("%llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)pr.rule.evaluations, - (unsigned long long)pr.rule.packets, - (unsigned long long)pr.rule.bytes); + (unsigned long long)(pr.rule.packets[0] + + pr.rule.packets[1]), + (unsigned long long)(pr.rule.bytes[0] + + pr.rule.bytes[1]), + (unsigned long long)pr.rule.packets[0], + (unsigned long long)pr.rule.bytes[0], + (unsigned long long)pr.rule.packets[1], + (unsigned long long)pr.rule.bytes[1]); } break; - default: + case PFCTL_SHOW_RULES: + brace = 0; if (pr.rule.label[0] && (opts & PF_OPT_SHOWALL)) labels = 1; - print_rule(&pr.rule, pr.anchor_call, rule_numbers); + INDENT(depth, !(opts & PF_OPT_VERBOSE)); + if (pr.anchor_call[0] && + ((((p = strrchr(pr.anchor_call, '_')) != NULL) && + ((void *)p == (void *)pr.anchor_call || + *(--p) == '/')) || (opts & PF_OPT_RECURSE))) { + brace++; + if ((p = strrchr(pr.anchor_call, '/')) != + NULL) + p++; + else + p = &pr.anchor_call[0]; + } else + p = &pr.anchor_call[0]; + + print_rule(&pr.rule, p, rule_numbers); + if (brace) + printf(" {\n"); + else + printf("\n"); pfctl_print_rule_counters(&pr.rule, opts); + if (brace) { + pfctl_show_rules(dev, path, opts, format, + p, depth + 1); + INDENT(depth, !(opts & PF_OPT_VERBOSE)); + printf("}\n"); + } + break; + case PFCTL_SHOW_NOTHING: + break; } pfctl_clear_pool(&pr.rule.rpool); } + path[len] = '\0'; return (0); + + error: + path[len] = '\0'; + return (-1); } int @@ -678,6 +941,7 @@ pfctl_show_nat(int dev, int opts, char *anchorname) } print_rule(&pr.rule, pr.anchor_call, opts & PF_OPT_VERBOSE2); + printf("\n"); pfctl_print_rule_counters(&pr.rule, opts); pfctl_clear_pool(&pr.rule.rpool); } @@ -705,16 +969,17 @@ pfctl_show_src_nodes(int dev, int opts) } if (ioctl(dev, DIOCGETSRCNODES, &psn) < 0) { warn("DIOCGETSRCNODES"); + free(inbuf); return (-1); } if (psn.psn_len + sizeof(struct pfioc_src_nodes) < len) break; if (len == 0 && psn.psn_len == 0) - return (0); + goto done; if (len == 0 && psn.psn_len != 0) len = psn.psn_len; if (psn.psn_len == 0) - return (0); /* no src_nodes */ + goto done; /* no src_nodes */ len *= 2; } p = psn.psn_src_nodes; @@ -724,6 +989,8 @@ pfctl_show_src_nodes(int dev, int opts) print_src_node(p, opts); p++; } +done: + free(inbuf); return (0); } @@ -747,16 +1014,17 @@ pfctl_show_states(int dev, const char *iface, int opts) } if (ioctl(dev, DIOCGETSTATES, &ps) < 0) { warn("DIOCGETSTATES"); + free(inbuf); return (-1); } if (ps.ps_len + sizeof(struct pfioc_states) < len) break; if (len == 0 && ps.ps_len == 0) - return (0); + goto done; if (len == 0 && ps.ps_len != 0) len = ps.ps_len; if (ps.ps_len == 0) - return (0); /* no states */ + goto done; /* no states */ len *= 2; } p = ps.ps_states; @@ -769,6 +1037,8 @@ pfctl_show_states(int dev, const char *iface, int opts) } print_state(p, opts); } +done: + free(inbuf); return (0); } @@ -825,11 +1095,11 @@ pfctl_show_limits(int dev, int opts) pl.index = pf_limits[i].index; if (ioctl(dev, DIOCGETLIMIT, &pl)) err(1, "DIOCGETLIMIT"); - printf("%-10s ", pf_limits[i].name); + printf("%-13s ", pf_limits[i].name); if (pl.limit == UINT_MAX) printf("unlimited\n"); else - printf("hard limit %6u\n", pl.limit); + printf("hard limit %8u\n", pl.limit); } return (0); } @@ -860,93 +1130,186 @@ int pfctl_add_rule(struct pfctl *pf, struct pf_rule *r, const char *anchor_call) { u_int8_t rs_num; - struct pfioc_rule pr; + struct pf_rule *rule; + struct pf_ruleset *rs; + char *p; - switch (r->action) { - case PF_SCRUB: - case PF_NOSCRUB: - if ((loadopt & PFCTL_FLAG_FILTER) == 0) - return (0); - rs_num = PF_RULESET_SCRUB; - break; - case PF_DROP: - case PF_PASS: - if ((loadopt & PFCTL_FLAG_FILTER) == 0) - return (0); - rs_num = PF_RULESET_FILTER; - break; - case PF_NAT: - case PF_NONAT: - if ((loadopt & PFCTL_FLAG_NAT) == 0) - return (0); - rs_num = PF_RULESET_NAT; - break; - case PF_RDR: - case PF_NORDR: - if ((loadopt & PFCTL_FLAG_NAT) == 0) - return (0); - rs_num = PF_RULESET_RDR; - break; - case PF_BINAT: - case PF_NOBINAT: - if ((loadopt & PFCTL_FLAG_NAT) == 0) - return (0); - rs_num = PF_RULESET_BINAT; - break; - default: + rs_num = pf_get_ruleset_number(r->action); + if (rs_num == PF_RULESET_MAX) errx(1, "Invalid rule type %d", r->action); - break; - } + rs = &pf->anchor->ruleset; - if ((pf->opts & PF_OPT_OPTIMIZE) && rs_num == PF_RULESET_FILTER) { - /* - * We'll do an optimization post-pass before finally adding the - * rules. Then we'll disable the optimization flag and feed - * the rules right back into this function. + if (anchor_call[0] && r->anchor == NULL) { + /* + * Don't make non-brace anchors part of the main anchor pool. */ - struct pf_opt_rule *pfr; - struct pf_pooladdr *pa; + if ((r->anchor = calloc(1, sizeof(*r->anchor))) == NULL) + err(1, "pfctl_add_rule: calloc"); + + pf_init_ruleset(&r->anchor->ruleset); + r->anchor->ruleset.anchor = r->anchor; + if (strlcpy(r->anchor->path, anchor_call, + sizeof(rule->anchor->path)) >= sizeof(rule->anchor->path)) + errx(1, "pfctl_add_rule: strlcpy"); + if ((p = strrchr(anchor_call, '/')) != NULL) { + if (!strlen(p)) + err(1, "pfctl_add_rule: bad anchor name %s", + anchor_call); + } else + p = (char *)anchor_call; + if (strlcpy(r->anchor->name, p, + sizeof(rule->anchor->name)) >= sizeof(rule->anchor->name)) + errx(1, "pfctl_add_rule: strlcpy"); + } - if ((pfr = calloc(1, sizeof(*pfr))) == NULL) - err(1, "calloc"); - memcpy(&pfr->por_rule, r, sizeof(*r)); - if (strlcpy(pfr->por_anchor, anchor_call, - sizeof(pfr->por_anchor)) >= sizeof(pfr->por_anchor)) - errx(1, "pfctl_add_rule: strlcpy"); - TAILQ_INSERT_TAIL(&pf->opt_queue, pfr, por_entry); - - if (TAILQ_FIRST(&r->rpool.list) != NULL) { - TAILQ_INIT(&pfr->por_rule.rpool.list); - while ((pa = TAILQ_FIRST(&r->rpool.list)) != NULL) { - TAILQ_REMOVE(&r->rpool.list, pa, entries); - TAILQ_INSERT_TAIL(&pfr->por_rule.rpool.list, pa, - entries); + if ((rule = calloc(1, sizeof(*rule))) == NULL) + err(1, "calloc"); + bcopy(r, rule, sizeof(*rule)); + TAILQ_INIT(&rule->rpool.list); + pfctl_move_pool(&r->rpool, &rule->rpool); + + TAILQ_INSERT_TAIL(rs->rules[rs_num].active.ptr, rule, entries); + return (0); +} + +int +pfctl_ruleset_trans(struct pfctl *pf, char *path, struct pf_anchor *a) +{ + int osize = pf->trans->pfrb_size; + + if ((pf->loadopt & PFCTL_FLAG_NAT) != 0) { + if (pfctl_add_trans(pf->trans, PF_RULESET_NAT, path) || + pfctl_add_trans(pf->trans, PF_RULESET_BINAT, path) || + pfctl_add_trans(pf->trans, PF_RULESET_RDR, path)) + return (1); + } + if (a == pf->astack[0] && ((altqsupport && + (pf->loadopt & PFCTL_FLAG_ALTQ) != 0))) { + if (pfctl_add_trans(pf->trans, PF_RULESET_ALTQ, path)) + return (2); + } + if ((pf->loadopt & PFCTL_FLAG_FILTER) != 0) { + if (pfctl_add_trans(pf->trans, PF_RULESET_SCRUB, path) || + pfctl_add_trans(pf->trans, PF_RULESET_FILTER, path)) + return (3); + } + if (pf->loadopt & PFCTL_FLAG_TABLE) + if (pfctl_add_trans(pf->trans, PF_RULESET_TABLE, path)) + return (4); + if (pfctl_trans(pf->dev, pf->trans, DIOCXBEGIN, osize)) + return (5); + + return (0); +} + +int +pfctl_load_ruleset(struct pfctl *pf, char *path, struct pf_ruleset *rs, + int rs_num, int depth) +{ + struct pf_rule *r; + int error, len = strlen(path); + int brace = 0; + + pf->anchor = rs->anchor; + + if (path[0]) + snprintf(&path[len], MAXPATHLEN - len, "/%s", pf->anchor->name); + else + snprintf(&path[len], MAXPATHLEN - len, "%s", pf->anchor->name); + + if (depth) { + if (TAILQ_FIRST(rs->rules[rs_num].active.ptr) != NULL) { + brace++; + if (pf->opts & PF_OPT_VERBOSE) + printf(" {\n"); + if ((pf->opts & PF_OPT_NOACTION) == 0 && + (error = pfctl_ruleset_trans(pf, + path, rs->anchor))) { + printf("pfctl_load_rulesets: " + "pfctl_ruleset_trans %d\n", error); + goto error; } - } else { - memset(&pfr->por_rule.rpool, 0, - sizeof(pfr->por_rule.rpool)); + } else if (pf->opts & PF_OPT_VERBOSE) + printf("\n"); - } - return (0); } + if (pf->optimize && rs_num == PF_RULESET_FILTER) + pfctl_optimize_ruleset(pf, rs); + + while ((r = TAILQ_FIRST(rs->rules[rs_num].active.ptr)) != NULL) { + TAILQ_REMOVE(rs->rules[rs_num].active.ptr, r, entries); + if ((error = pfctl_load_rule(pf, path, r, depth))) + goto error; + if (r->anchor) { + if ((error = pfctl_load_ruleset(pf, path, + &r->anchor->ruleset, rs_num, depth + 1))) + goto error; + } else if (pf->opts & PF_OPT_VERBOSE) + printf("\n"); + free(r); + } + if (brace && pf->opts & PF_OPT_VERBOSE) { + INDENT(depth - 1, (pf->opts & PF_OPT_VERBOSE)); + printf("}\n"); + } + path[len] = '\0'; + return (0); + + error: + path[len] = '\0'; + return (error); + +} + +int +pfctl_load_rule(struct pfctl *pf, char *path, struct pf_rule *r, int depth) +{ + u_int8_t rs_num = pf_get_ruleset_number(r->action); + char *name; + struct pfioc_rule pr; + int len = strlen(path); + + bzero(&pr, sizeof(pr)); + /* set up anchor before adding to path for anchor_call */ + if ((pf->opts & PF_OPT_NOACTION) == 0) + pr.ticket = pfctl_get_ticket(pf->trans, rs_num, path); + if (strlcpy(pr.anchor, path, sizeof(pr.anchor)) >= sizeof(pr.anchor)) + errx(1, "pfctl_load_rule: strlcpy"); + + if (r->anchor) { + if (r->anchor->match) { + if (path[0]) + snprintf(&path[len], MAXPATHLEN - len, + "/%s", r->anchor->name); + else + snprintf(&path[len], MAXPATHLEN - len, + "%s", r->anchor->name); + name = path; + } else + name = r->anchor->path; + } else + name = ""; + if ((pf->opts & PF_OPT_NOACTION) == 0) { - bzero(&pr, sizeof(pr)); - if (strlcpy(pr.anchor, pf->anchor, sizeof(pr.anchor)) >= - sizeof(pr.anchor)) - errx(1, "pfctl_add_rule: strlcpy"); if (pfctl_add_pool(pf, &r->rpool, r->af)) return (1); - pr.ticket = pfctl_get_ticket(pf->trans, rs_num, pf->anchor); pr.pool_ticket = pf->paddr.ticket; memcpy(&pr.rule, r, sizeof(pr.rule)); - strlcpy(pr.anchor_call, anchor_call, sizeof(pr.anchor_call)); + if (r->anchor && strlcpy(pr.anchor_call, name, + sizeof(pr.anchor_call)) >= sizeof(pr.anchor_call)) + errx(1, "pfctl_load_rule: strlcpy"); if (ioctl(pf->dev, DIOCADDRULE, &pr)) err(1, "DIOCADDRULE"); } - if (pf->opts & PF_OPT_VERBOSE) - print_rule(r, anchor_call, pf->opts & PF_OPT_VERBOSE2); + + if (pf->opts & PF_OPT_VERBOSE) { + INDENT(depth, !(pf->opts & PF_OPT_VERBOSE2)); + print_rule(r, r->anchor ? r->anchor->name : "", + pf->opts & PF_OPT_VERBOSE2); + } + path[len] = '\0'; pfctl_clear_pool(&r->rpool); return (0); } @@ -974,86 +1337,86 @@ pfctl_add_altq(struct pfctl *pf, struct pf_altq *a) } int -pfctl_rules(int dev, char *filename, int opts, char *anchorname, - struct pfr_buffer *trans) +pfctl_rules(int dev, char *filename, FILE *fin, int opts, int optimize, + char *anchorname, struct pfr_buffer *trans) { #define ERR(x) do { warn(x); goto _error; } while(0) #define ERRX(x) do { warnx(x); goto _error; } while(0) - FILE *fin; struct pfr_buffer *t, buf; struct pfioc_altq pa; struct pfctl pf; + struct pf_ruleset *rs; struct pfr_table trs; + char *path; int osize; + RB_INIT(&pf_anchors); + memset(&pf_main_anchor, 0, sizeof(pf_main_anchor)); + pf_init_ruleset(&pf_main_anchor.ruleset); + pf_main_anchor.ruleset.anchor = &pf_main_anchor; if (trans == NULL) { - bzero(&buf, sizeof(buf)); - buf.pfrb_type = PFRB_TRANS; - t = &buf; - osize = 0; + bzero(&buf, sizeof(buf)); + buf.pfrb_type = PFRB_TRANS; + t = &buf; + osize = 0; } else { - t = trans; - osize = t->pfrb_size; + t = trans; + osize = t->pfrb_size; } memset(&pa, 0, sizeof(pa)); memset(&pf, 0, sizeof(pf)); memset(&trs, 0, sizeof(trs)); + if ((path = calloc(1, MAXPATHLEN)) == NULL) + ERRX("pfctl_rules: calloc"); if (strlcpy(trs.pfrt_anchor, anchorname, sizeof(trs.pfrt_anchor)) >= sizeof(trs.pfrt_anchor)) ERRX("pfctl_rules: strlcpy"); - if (strcmp(filename, "-") == 0) { - fin = stdin; - infile = "stdin"; - } else { - if ((fin = pfctl_fopen(filename, "r")) == NULL) { - warn("%s", filename); - return (1); - } - infile = filename; - } + infile = filename; pf.dev = dev; pf.opts = opts; + pf.optimize = optimize; pf.loadopt = loadopt; + + /* non-brace anchor, create without resolving the path */ + if ((pf.anchor = calloc(1, sizeof(*pf.anchor))) == NULL) + ERRX("pfctl_rules: calloc"); + rs = &pf.anchor->ruleset; + pf_init_ruleset(rs); + rs->anchor = pf.anchor; + if (strlcpy(pf.anchor->path, anchorname, + sizeof(pf.anchor->path)) >= sizeof(pf.anchor->path)) + errx(1, "pfctl_add_rule: strlcpy"); + if (strlcpy(pf.anchor->name, anchorname, + sizeof(pf.anchor->name)) >= sizeof(pf.anchor->name)) + errx(1, "pfctl_add_rule: strlcpy"); + + + pf.astack[0] = pf.anchor; + pf.asd = 0; if (anchorname[0]) pf.loadopt &= ~PFCTL_FLAG_ALTQ; pf.paltq = &pa; pf.trans = t; - pf.rule_nr = 0; - pf.anchor = anchorname; - TAILQ_INIT(&pf.opt_queue); pfctl_init_options(&pf); if ((opts & PF_OPT_NOACTION) == 0) { - if ((pf.loadopt & PFCTL_FLAG_NAT) != 0) { - if (pfctl_add_trans(t, PF_RULESET_NAT, anchorname) || - pfctl_add_trans(t, PF_RULESET_BINAT, anchorname) || - pfctl_add_trans(t, PF_RULESET_RDR, anchorname)) - ERR("pfctl_rules"); - } - if (((altqsupport && (pf.loadopt & PFCTL_FLAG_ALTQ) != 0))) { - if (pfctl_add_trans(t, PF_RULESET_ALTQ, anchorname)) - ERR("pfctl_rules"); - } - if ((pf.loadopt & PFCTL_FLAG_FILTER) != 0) { - if (pfctl_add_trans(t, PF_RULESET_SCRUB, anchorname) || - pfctl_add_trans(t, PF_RULESET_FILTER, anchorname)) - ERR("pfctl_rules"); - } - if (pf.loadopt & PFCTL_FLAG_TABLE) { - if (pfctl_add_trans(t, PF_RULESET_TABLE, anchorname)) - ERR("pfctl_rules"); - } - if (pfctl_trans(dev, t, DIOCXBEGIN, osize)) - ERR("DIOCXBEGIN"); + /* + * XXX For the time being we need to open transactions for + * the main ruleset before parsing, because tables are still + * loaded at parse time. + */ + if (pfctl_ruleset_trans(&pf, anchorname, pf.anchor)) + ERRX("pfctl_rules"); if (altqsupport && (pf.loadopt & PFCTL_FLAG_ALTQ)) - pa.ticket = pfctl_get_ticket(t, PF_RULESET_ALTQ, - anchorname); + pa.ticket = + pfctl_get_ticket(t, PF_RULESET_ALTQ, anchorname); if (pf.loadopt & PFCTL_FLAG_TABLE) - pf.tticket = pfctl_get_ticket(t, PF_RULESET_TABLE, - anchorname); + pf.astack[0]->ruleset.tticket = + pfctl_get_ticket(t, PF_RULESET_TABLE, anchorname); } + if (parse_rules(fin, &pf) < 0) { if ((opts & PF_OPT_NOACTION) == 0) ERRX("Syntax error in config file: " @@ -1061,9 +1424,19 @@ pfctl_rules(int dev, char *filename, int opts, char *anchorname, else goto _error; } - if (pf.opts & PF_OPT_OPTIMIZE) { - if (pfctl_optimize_rules(&pf)) - ERRX("Failed to optimize ruleset: pf rules not loaded"); + + if ((pf.loadopt & PFCTL_FLAG_FILTER && + (pfctl_load_ruleset(&pf, path, rs, PF_RULESET_SCRUB, 0))) || + (pf.loadopt & PFCTL_FLAG_NAT && + (pfctl_load_ruleset(&pf, path, rs, PF_RULESET_NAT, 0) || + pfctl_load_ruleset(&pf, path, rs, PF_RULESET_RDR, 0) || + pfctl_load_ruleset(&pf, path, rs, PF_RULESET_BINAT, 0))) || + (pf.loadopt & PFCTL_FLAG_FILTER && + pfctl_load_ruleset(&pf, path, rs, PF_RULESET_FILTER, 0))) { + if ((opts & PF_OPT_NOACTION) == 0) + ERRX("Unable to load rules into kernel"); + else + goto _error; } if ((altqsupport && (pf.loadopt & PFCTL_FLAG_ALTQ) != 0)) @@ -1077,14 +1450,14 @@ pfctl_rules(int dev, char *filename, int opts, char *anchorname, /* process "load anchor" directives */ if (!anchorname[0]) - if (pfctl_load_anchors(dev, opts, t) == -1) + if (pfctl_load_anchors(dev, &pf, t) == -1) ERRX("load anchors"); if (trans == NULL && (opts & PF_OPT_NOACTION) == 0) { if (!anchorname[0]) if (pfctl_load_options(&pf)) goto _error; - if (pfctl_trans(dev, t, DIOCXCOMMIT, 0)) + if (pfctl_trans(dev, t, DIOCXCOMMIT, osize)) ERR("DIOCXCOMMIT"); } return (0); @@ -1092,7 +1465,7 @@ pfctl_rules(int dev, char *filename, int opts, char *anchorname, _error: if (trans == NULL) { /* main ruleset */ if ((opts & PF_OPT_NOACTION) == 0) - if (pfctl_trans(dev, t, DIOCXROLLBACK, 0)) + if (pfctl_trans(dev, t, DIOCXROLLBACK, osize)) err(1, "DIOCXROLLBACK"); exit(1); } else { /* sub ruleset */ @@ -1129,6 +1502,9 @@ pfctl_fopen(const char *name, const char *mode) void pfctl_init_options(struct pfctl *pf) { + int mib[2], mem; + size_t size; + pf->timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; pf->timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; pf->timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; @@ -1147,10 +1523,21 @@ pfctl_init_options(struct pfctl *pf) pf->timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; pf->timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; pf->timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; - - pf->limit[PF_LIMIT_STATES] = PFSTATE_HIWAT; - pf->limit[PF_LIMIT_FRAGS] = PFFRAG_FRENT_HIWAT; - pf->limit[PF_LIMIT_SRC_NODES] = PFSNODE_HIWAT; + pf->timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; + pf->timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; + + pf->limit[PF_LIMIT_STATES] = PFSTATE_HIWAT; + pf->limit[PF_LIMIT_FRAGS] = PFFRAG_FRENT_HIWAT; + pf->limit[PF_LIMIT_SRC_NODES] = PFSNODE_HIWAT; + pf->limit[PF_LIMIT_TABLES] = PFR_KTABLE_HIWAT; + pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT; + + mib[0] = CTL_HW; + mib[1] = HW_PHYSMEM; + size = sizeof(mem); + (void) sysctl(mib, 2, &mem, &size, NULL, 0); + if (mem <= 100*1024*1024) + pf->limit[PF_LIMIT_TABLE_ENTRIES] = PFR_KENTRY_HIWAT_SMALL; pf->debug = PF_DEBUG_URGENT; } @@ -1171,6 +1558,21 @@ pfctl_load_options(struct pfctl *pf) error = 1; } + /* + * If we've set the limit, but havn't explicitly set adaptive + * timeouts, do it now with a start of 60% and end of 120%. + */ + if (pf->limit_set[PF_LIMIT_STATES] && + !pf->timeout_set[PFTM_ADAPTIVE_START] && + !pf->timeout_set[PFTM_ADAPTIVE_END]) { + pf->timeout[PFTM_ADAPTIVE_START] = + (pf->limit[PF_LIMIT_STATES] / 10) * 6; + pf->timeout_set[PFTM_ADAPTIVE_START] = 1; + pf->timeout[PFTM_ADAPTIVE_END] = + (pf->limit[PF_LIMIT_STATES] / 10) * 12; + pf->timeout_set[PFTM_ADAPTIVE_END] = 1; + } + /* load timeouts */ for (i = 0; i < PFTM_MAX; i++) { if ((pf->opts & PF_OPT_MERGE) && !pf->timeout_set[i]) @@ -1297,7 +1699,7 @@ pfctl_set_optimization(struct pfctl *pf, const char *opt) hint = pf_hints[i].hint; if (hint == NULL) { - warnx("Bad hint name."); + warnx("invalid state timeouts optimization"); return (1); } @@ -1343,7 +1745,7 @@ pfctl_load_logif(struct pfctl *pf, char *ifname) memset(&pi, 0, sizeof(pi)); if (ifname && strlcpy(pi.ifname, ifname, sizeof(pi.ifname)) >= sizeof(pi.ifname)) { - warnx("pfctl_set_logif: strlcpy"); + warnx("pfctl_load_logif: strlcpy"); return (1); } if (ioctl(pf->dev, DIOCSETSTATUSIF, &pi)) { @@ -1480,16 +1882,6 @@ pfctl_debug(int dev, u_int32_t level, int opts) } int -pfctl_clear_rule_counters(int dev, int opts) -{ - if (ioctl(dev, DIOCCLRRULECTRS)) - err(1, "DIOCCLRRULECTRS"); - if ((opts & PF_OPT_QUIET) == 0) - fprintf(stderr, "pf: rule counters cleared\n"); - return (0); -} - -int pfctl_test_altqsupport(int dev, int opts) { struct pfioc_altq pa; @@ -1537,8 +1929,9 @@ pfctl_show_anchors(int dev, int opts, char *anchorname) strlcat(sub, "/", sizeof(sub)); } strlcat(sub, pr.name, sizeof(sub)); - printf(" %s\n", sub); - if (opts & PF_OPT_VERBOSE && pfctl_show_anchors(dev, opts, sub)) + if (sub[0] != '_' || (opts & PF_OPT_VERBOSE)) + printf(" %s\n", sub); + if ((opts & PF_OPT_VERBOSE) && pfctl_show_anchors(dev, opts, sub)) return (-1); } return (0); @@ -1557,17 +1950,20 @@ pfctl_lookup_option(char *cmd, const char **list) int main(int argc, char *argv[]) { - int error = 0; - int ch; - int mode = O_RDONLY; - int opts = 0; - char anchorname[MAXPATHLEN]; + int error = 0; + int ch; + int mode = O_RDONLY; + int opts = 0; + int optimize = 0; + char anchorname[MAXPATHLEN]; + char *path; + FILE *fin = NULL; if (argc < 2) usage(); while ((ch = getopt(argc, argv, - "a:AdD:eqf:F:ghi:k:mnNOop:rRs:t:T:vx:z")) != -1) { + "a:AdD:eqf:F:ghi:k:K:mnNOo::p:rRs:t:T:vx:z")) != -1) { switch (ch) { case 'a': anchoropt = optarg; @@ -1608,6 +2004,15 @@ main(int argc, char *argv[]) state_kill[state_killers++] = optarg; mode = O_RDWR; break; + case 'K': + if (src_node_killers >= 2) { + warnx("can only specify -K twice"); + usage(); + /* NOTREACHED */ + } + src_node_kill[src_node_killers++] = optarg; + mode = O_RDWR; + break; case 'm': opts |= PF_OPT_MERGE; break; @@ -1634,10 +2039,25 @@ main(int argc, char *argv[]) loadopt |= PFCTL_FLAG_FILTER; break; case 'o': - if (opts & PF_OPT_OPTIMIZE) - opts |= PF_OPT_OPTIMIZE_PROFILE; - else - opts |= PF_OPT_OPTIMIZE; + if (optarg) { + optiopt = pfctl_lookup_option(optarg, + optiopt_list); + if (optiopt == NULL) { + warnx("Unknown optimization '%s'", + optarg); + usage(); + } + } + if (opts & PF_OPT_OPTIMIZE) { + if (optiopt != NULL) { + warnx("Cannot specify -o multiple times" + "with optimizer level"); + usage(); + } + optimize |= PF_OPTIMIZE_PROFILE; + } + optimize |= PF_OPTIMIZE_BASIC; + opts |= PF_OPT_OPTIMIZE; break; case 'O': loadopt |= PFCTL_FLAG_OPTION; @@ -1695,7 +2115,7 @@ main(int argc, char *argv[]) loadopt |= PFCTL_FLAG_TABLE; tblcmdopt = NULL; } else - mode = strchr("acdfkrz", ch) ? O_RDWR : O_RDONLY; + mode = strchr("acdefkrz", ch) ? O_RDWR : O_RDONLY; } else if (argc != optind) { warnx("unknown command line argument: %s ...", argv[optind]); usage(); @@ -1704,8 +2124,19 @@ main(int argc, char *argv[]) if (loadopt == 0) loadopt = ~0; + if ((path = calloc(1, MAXPATHLEN)) == NULL) + errx(1, "pfctl: calloc"); memset(anchorname, 0, sizeof(anchorname)); if (anchoropt != NULL) { + int len = strlen(anchoropt); + + if (anchoropt[len - 1] == '*') { + if (len >= 2 && anchoropt[len - 2] == '/') + anchoropt[len - 2] = '\0'; + else + anchoropt[len - 1] = '\0'; + opts |= PF_OPT_RECURSE; + } if (strlcpy(anchorname, anchoropt, sizeof(anchorname)) >= sizeof(anchorname)) errx(1, "anchor name '%s' too long", @@ -1739,11 +2170,13 @@ main(int argc, char *argv[]) break; case 'r': pfctl_load_fingerprints(dev, opts); - pfctl_show_rules(dev, opts, 0, anchorname); + pfctl_show_rules(dev, path, opts, PFCTL_SHOW_RULES, + anchorname, 0); break; case 'l': pfctl_load_fingerprints(dev, opts); - pfctl_show_rules(dev, opts, 1, anchorname); + pfctl_show_rules(dev, path, opts, PFCTL_SHOW_LABELS, + anchorname, 0); break; case 'n': pfctl_load_fingerprints(dev, opts); @@ -1773,12 +2206,12 @@ main(int argc, char *argv[]) pfctl_load_fingerprints(dev, opts); pfctl_show_nat(dev, opts, anchorname); - pfctl_show_rules(dev, opts, 0, anchorname); + pfctl_show_rules(dev, path, opts, 0, anchorname, 0); pfctl_show_altq(dev, ifaceopt, opts, 0); pfctl_show_states(dev, ifaceopt, opts); pfctl_show_src_nodes(dev, opts); pfctl_show_status(dev, opts); - pfctl_show_rules(dev, opts, 1, anchorname); + pfctl_show_rules(dev, path, opts, 1, anchorname, 0); pfctl_show_timeouts(dev, opts); pfctl_show_limits(dev, opts); pfctl_show_tables(anchorname, opts); @@ -1797,7 +2230,15 @@ main(int argc, char *argv[]) } } + if ((opts & PF_OPT_CLRRULECTRS) && showopt == NULL) + pfctl_show_rules(dev, path, opts, PFCTL_SHOW_NOTHING, + anchorname, 0); + if (clearopt != NULL) { + if (anchorname[0] == '_' || strstr(anchorname, "/_") != NULL) + errx(1, "anchor names beginning with '_' cannot " + "be modified from the command line"); + switch (*clearopt) { case 'r': pfctl_clear_rules(dev, opts, anchorname); @@ -1841,13 +2282,40 @@ main(int argc, char *argv[]) if (state_killers) pfctl_kill_states(dev, ifaceopt, opts); + if (src_node_killers) + pfctl_kill_src_nodes(dev, ifaceopt, opts); + if (tblcmdopt != NULL) { error = pfctl_command_tables(argc, argv, tableopt, tblcmdopt, rulesopt, anchorname, opts); rulesopt = NULL; } + if (optiopt != NULL) { + switch (*optiopt) { + case 'n': + optimize = 0; + break; + case 'b': + optimize |= PF_OPTIMIZE_BASIC; + break; + case 'o': + case 'p': + optimize |= PF_OPTIMIZE_PROFILE; + break; + } + } - if ((rulesopt != NULL) && (!*anchorname)) + if (rulesopt != NULL) { + if (strcmp(rulesopt, "-") == 0) { + fin = stdin; + rulesopt = "stdin"; + } else { + if ((fin = pfctl_fopen(rulesopt, "r")) == NULL) + err(1, "%s", rulesopt); + } + } + if ((rulesopt != NULL) && (loadopt & PFCTL_FLAG_OPTION) && + !anchorname[0]) if (pfctl_clear_interface_flags(dev, opts | PF_OPT_QUIET)) error = 1; @@ -1857,7 +2325,11 @@ main(int argc, char *argv[]) error = 1; if (rulesopt != NULL) { - if (pfctl_rules(dev, rulesopt, opts, anchorname, NULL)) + if (anchorname[0] == '_' || strstr(anchorname, "/_") != NULL) + errx(1, "anchor names beginning with '_' cannot " + "be modified from the command line"); + if (pfctl_rules(dev, rulesopt, fin, opts, optimize, + anchorname, NULL)) error = 1; else if (!(opts & PF_OPT_NOACTION) && (loadopt & PFCTL_FLAG_TABLE)) @@ -1885,9 +2357,5 @@ main(int argc, char *argv[]) } } - if (opts & PF_OPT_CLRRULECTRS) { - if (pfctl_clear_rule_counters(dev, opts)) - error = 1; - } exit(error); } diff --git a/contrib/pf/pfctl/pfctl.h b/contrib/pf/pfctl/pfctl.h index 3b943dc..9450a55 100644 --- a/contrib/pf/pfctl/pfctl.h +++ b/contrib/pf/pfctl/pfctl.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl.h,v 1.37 2005/01/05 18:23:10 mcbride Exp $ */ +/* $OpenBSD: pfctl.h,v 1.40 2007/02/09 11:25:27 henning Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -33,6 +33,8 @@ #ifndef _PFCTL_H_ #define _PFCTL_H_ +enum pfctl_show { PFCTL_SHOW_RULES, PFCTL_SHOW_LABELS, PFCTL_SHOW_NOTHING }; + enum { PFRB_TABLES = 1, PFRB_TSTATS, PFRB_ADDRS, PFRB_ASTATS, PFRB_IFACES, PFRB_TRANS, PFRB_MAX }; struct pfr_buffer { @@ -73,7 +75,7 @@ int pfr_buf_grow(struct pfr_buffer *, int); int pfr_buf_load(struct pfr_buffer *, char *, int, int (*)(struct pfr_buffer *, char *, int)); char *pfr_strerror(int); -int pfi_get_ifaces(const char *, struct pfi_if *, int *, int); +int pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_clr_istats(const char *, int *, int); void pfctl_print_title(char *); @@ -106,7 +108,6 @@ extern int loadopt; int check_commit_altq(int, int); void pfaltq_store(struct pf_altq *); -void pfaltq_free(struct pf_altq *); struct pf_altq *pfaltq_lookup(const char *); char *rate2str(double); diff --git a/contrib/pf/pfctl/pfctl_altq.c b/contrib/pf/pfctl/pfctl_altq.c index 3f5b087..b4faaa0 100644 --- a/contrib/pf/pfctl/pfctl_altq.c +++ b/contrib/pf/pfctl/pfctl_altq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl_altq.c,v 1.86 2005/02/28 14:04:51 henning Exp $ */ +/* $OpenBSD: pfctl_altq.c,v 1.91 2006/11/28 00:08:50 henning Exp $ */ /* * Copyright (c) 2002 @@ -93,21 +93,6 @@ pfaltq_store(struct pf_altq *a) TAILQ_INSERT_TAIL(&altqs, altq, entries); } -void -pfaltq_free(struct pf_altq *a) -{ - struct pf_altq *altq; - - TAILQ_FOREACH(altq, &altqs, entries) { - if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0 && - strncmp(a->qname, altq->qname, PF_QNAME_SIZE) == 0) { - TAILQ_REMOVE(&altqs, altq, entries); - free(altq); - return; - } - } -} - struct pf_altq * pfaltq_lookup(const char *ifname) { @@ -157,7 +142,7 @@ print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw, struct node_queue_opt *qopts) { if (a->qname[0] != 0) { - print_queue(a, level, bw, 0, qopts); + print_queue(a, level, bw, 1, qopts); return; } @@ -238,8 +223,8 @@ eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw, pa->ifbandwidth = bw->bw_absolute; else if ((rate = getifspeed(pa->ifname)) == 0) { - fprintf(stderr, "cannot determine interface bandwidth " - "for %s, specify an absolute bandwidth\n", + fprintf(stderr, "interface %s does not know its bandwidth, " + "please specify an absolute bandwidth\n", pa->ifname); errors++; } else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0) @@ -490,10 +475,7 @@ cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa) maxidle = ptime * maxidle; else maxidle = ptime * maxidle_s; - if (minburst) - offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom); - else - offtime = cptime; + offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom); minidle = -((double)opts->maxpktsize * (double)nsPerByte); /* scale parameters */ @@ -698,8 +680,8 @@ eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa) } if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) || - (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) || - (opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0)) { + (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) || + (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) { warnx("m1 must be zero for convex curve: %s", pa->qname); return (-1); } diff --git a/contrib/pf/pfctl/pfctl_optimize.c b/contrib/pf/pfctl/pfctl_optimize.c index 6c6803e..37d9320 100644 --- a/contrib/pf/pfctl/pfctl_optimize.c +++ b/contrib/pf/pfctl/pfctl_optimize.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl_optimize.c,v 1.5 2005/01/03 15:18:10 frantzen Exp $ */ +/* $OpenBSD: pfctl_optimize.c,v 1.13 2006/10/31 14:17:45 mcbride Exp $ */ /* * Copyright (c) 2004 Mike Frantzen <frantzen@openbsd.org> @@ -109,6 +109,10 @@ struct pf_rule_field { PF_RULE_FIELD(prob, BARRIER), PF_RULE_FIELD(max_states, BARRIER), PF_RULE_FIELD(max_src_nodes, BARRIER), + PF_RULE_FIELD(max_src_states, BARRIER), + PF_RULE_FIELD(max_src_conn, BARRIER), + PF_RULE_FIELD(max_src_conn_rate, BARRIER), + PF_RULE_FIELD(anchor, BARRIER), /* for now */ /* * These fields must be the same between all rules in the same superblock. @@ -120,10 +124,18 @@ struct pf_rule_field { PF_RULE_FIELD(tagname, BREAK), PF_RULE_FIELD(keep_state, BREAK), PF_RULE_FIELD(qname, BREAK), + PF_RULE_FIELD(pqname, BREAK), PF_RULE_FIELD(rt, BREAK), PF_RULE_FIELD(allow_opts, BREAK), PF_RULE_FIELD(rule_flag, BREAK), PF_RULE_FIELD(action, BREAK), + PF_RULE_FIELD(log, BREAK), + PF_RULE_FIELD(quick, BREAK), + PF_RULE_FIELD(return_ttl, BREAK), + PF_RULE_FIELD(overload_tblname, BREAK), + PF_RULE_FIELD(flush, BREAK), + PF_RULE_FIELD(rpool, BREAK), + PF_RULE_FIELD(logif, BREAK), /* * Any fields not listed in this structure act as BREAK fields @@ -137,7 +149,7 @@ struct pf_rule_field { */ PF_RULE_FIELD(af, NOMERGE), PF_RULE_FIELD(ifnot, NOMERGE), - PF_RULE_FIELD(ifname, NOMERGE), + PF_RULE_FIELD(ifname, NOMERGE), /* hack for IF groups */ PF_RULE_FIELD(match_tag_not, NOMERGE), PF_RULE_FIELD(match_tagname, NOMERGE), PF_RULE_FIELD(os_fingerprint, NOMERGE), @@ -170,7 +182,6 @@ struct pf_rule_field { PF_RULE_FIELD(packets, DC), PF_RULE_FIELD(bytes, DC), PF_RULE_FIELD(kif, DC), - PF_RULE_FIELD(anchor, DC), PF_RULE_FIELD(states, DC), PF_RULE_FIELD(src_nodes, DC), PF_RULE_FIELD(nr, DC), @@ -179,6 +190,9 @@ struct pf_rule_field { PF_RULE_FIELD(pqid, DC), PF_RULE_FIELD(anchor_relative, DC), PF_RULE_FIELD(anchor_wildcard, DC), + PF_RULE_FIELD(tag, DC), + PF_RULE_FIELD(match_tag, DC), + PF_RULE_FIELD(overload_tbl, DC), /* These fields should never be set in a PASS/BLOCK rule */ PF_RULE_FIELD(natpass, NEVER), @@ -198,6 +212,7 @@ void comparable_rule(struct pf_rule *, const struct pf_rule *, int); int construct_superblocks(struct pfctl *, struct pf_opt_queue *, struct superblocks *); void exclude_supersets(struct pf_rule *, struct pf_rule *); +int interface_group(const char *); int load_feedback_profile(struct pfctl *, struct superblocks *); int optimize_superblock(struct pfctl *, struct superblock *); int pf_opt_create_table(struct pfctl *, struct pf_opt_tbl *); @@ -240,25 +255,52 @@ int table_identifier; int -pfctl_optimize_rules(struct pfctl *pf) +pfctl_optimize_ruleset(struct pfctl *pf, struct pf_ruleset *rs) { struct superblocks superblocks; + struct pf_opt_queue opt_queue; struct superblock *block; struct pf_opt_rule *por; - int nr; + struct pf_rule *r; + struct pf_rulequeue *old_rules; DEBUG("optimizing ruleset"); memset(&table_buffer, 0, sizeof(table_buffer)); skip_init(); + TAILQ_INIT(&opt_queue); - if (TAILQ_FIRST(&pf->opt_queue)) - nr = TAILQ_FIRST(&pf->opt_queue)->por_rule.nr; + old_rules = rs->rules[PF_RULESET_FILTER].active.ptr; + rs->rules[PF_RULESET_FILTER].active.ptr = + rs->rules[PF_RULESET_FILTER].inactive.ptr; + rs->rules[PF_RULESET_FILTER].inactive.ptr = old_rules; + + /* + * XXX expanding the pf_opt_rule format throughout pfctl might allow + * us to avoid all this copying. + */ + while ((r = TAILQ_FIRST(rs->rules[PF_RULESET_FILTER].inactive.ptr)) + != NULL) { + TAILQ_REMOVE(rs->rules[PF_RULESET_FILTER].inactive.ptr, r, + entries); + if ((por = calloc(1, sizeof(*por))) == NULL) + err(1, "calloc"); + memcpy(&por->por_rule, r, sizeof(*r)); + if (TAILQ_FIRST(&r->rpool.list) != NULL) { + TAILQ_INIT(&por->por_rule.rpool.list); + pfctl_move_pool(&r->rpool, &por->por_rule.rpool); + } else + bzero(&por->por_rule.rpool, + sizeof(por->por_rule.rpool)); + + + TAILQ_INSERT_TAIL(&opt_queue, por, por_entry); + } TAILQ_INIT(&superblocks); - if (construct_superblocks(pf, &pf->opt_queue, &superblocks)) + if (construct_superblocks(pf, &opt_queue, &superblocks)) goto error; - if (pf->opts & PF_OPT_OPTIMIZE_PROFILE) { + if (pf->optimize & PF_OPTIMIZE_PROFILE) { if (load_feedback_profile(pf, &superblocks)) goto error; } @@ -268,24 +310,21 @@ pfctl_optimize_rules(struct pfctl *pf) goto error; } - - /* - * Optimizations are done so we turn off the optimization flag and - * put the rules right back into the regular codepath. - */ - pf->opts &= ~PF_OPT_OPTIMIZE; - + rs->anchor->refcnt = 0; while ((block = TAILQ_FIRST(&superblocks))) { TAILQ_REMOVE(&superblocks, block, sb_entry); while ((por = TAILQ_FIRST(&block->sb_rules))) { TAILQ_REMOVE(&block->sb_rules, por, por_entry); - por->por_rule.nr = nr++; - if (pfctl_add_rule(pf, &por->por_rule, - por->por_anchor)) { - free(por); - goto error; - } + por->por_rule.nr = rs->anchor->refcnt++; + if ((r = calloc(1, sizeof(*r))) == NULL) + err(1, "calloc"); + memcpy(r, &por->por_rule, sizeof(*r)); + TAILQ_INIT(&r->rpool.list); + pfctl_move_pool(&por->por_rule.rpool, &r->rpool); + TAILQ_INSERT_TAIL( + rs->rules[PF_RULESET_FILTER].active.ptr, + r, entries); free(por); } free(block); @@ -294,8 +333,8 @@ pfctl_optimize_rules(struct pfctl *pf) return (0); error: - while ((por = TAILQ_FIRST(&pf->opt_queue))) { - TAILQ_REMOVE(&pf->opt_queue, por, por_entry); + while ((por = TAILQ_FIRST(&opt_queue))) { + TAILQ_REMOVE(&opt_queue, por, por_entry); if (por->por_src_tbl) { pfr_buf_clear(por->por_src_tbl->pt_buf); free(por->por_src_tbl->pt_buf); @@ -364,7 +403,8 @@ optimize_superblock(struct pfctl *pf, struct superblock *block) printf("--- Superblock ---\n"); TAILQ_FOREACH(por, &block->sb_rules, por_entry) { printf(" "); - print_rule(&por->por_rule, por->por_anchor, 1); + print_rule(&por->por_rule, por->por_rule.anchor ? + por->por_rule.anchor->name : "", 1); } #endif /* OPT_DEBUG */ @@ -373,7 +413,7 @@ optimize_superblock(struct pfctl *pf, struct superblock *block) return (1); if (combine_rules(pf, block)) return (1); - if ((pf->opts & PF_OPT_OPTIMIZE_PROFILE) && + if ((pf->optimize & PF_OPTIMIZE_PROFILE) && TAILQ_FIRST(&block->sb_rules)->por_rule.quick && block->sb_profiled_block) { if (block_feedback(pf, block)) @@ -780,14 +820,16 @@ block_feedback(struct pfctl *pf, struct superblock *block) */ TAILQ_FOREACH(por1, &block->sb_profiled_block->sb_rules, por_entry) { comparable_rule(&a, &por1->por_rule, DC); - total_count += por1->por_rule.packets; + total_count += por1->por_rule.packets[0] + + por1->por_rule.packets[1]; TAILQ_FOREACH(por2, &block->sb_rules, por_entry) { if (por2->por_profile_count) continue; comparable_rule(&b, &por2->por_rule, DC); if (memcmp(&a, &b, sizeof(a)) == 0) { por2->por_profile_count = - por1->por_rule.packets; + por1->por_rule.packets[0] + + por1->por_rule.packets[1]; break; } } @@ -851,6 +893,7 @@ load_feedback_profile(struct pfctl *pf, struct superblocks *superblocks) DEBUG("Loading %d active rules for a feedback profile", mnr); for (nr = 0; nr < mnr; ++nr) { + struct pf_ruleset *rs; if ((por = calloc(1, sizeof(*por))) == NULL) { warn("calloc"); return (1); @@ -861,8 +904,8 @@ load_feedback_profile(struct pfctl *pf, struct superblocks *superblocks) return (1); } memcpy(&por->por_rule, &pr.rule, sizeof(por->por_rule)); - strlcpy(por->por_anchor, pr.anchor_call, - sizeof(por->por_anchor)); + rs = pf_find_or_create_ruleset(pr.anchor_call); + por->por_rule.anchor = rs->anchor; if (TAILQ_EMPTY(&por->por_rule.rpool.list)) memset(&por->por_rule.rpool, 0, sizeof(por->por_rule.rpool)); @@ -1045,6 +1088,7 @@ skip_cmp_dst_addr(struct pf_rule *a, struct pf_rule *b) return (1); return (0); case PF_ADDR_NOROUTE: + case PF_ADDR_URPFFAILED: return (0); case PF_ADDR_TABLE: return (strcmp(a->dst.addr.v.tblname, b->dst.addr.v.tblname)); @@ -1116,6 +1160,7 @@ skip_cmp_src_addr(struct pf_rule *a, struct pf_rule *b) return (1); return (0); case PF_ADDR_NOROUTE: + case PF_ADDR_URPFFAILED: return (0); case PF_ADDR_TABLE: return (strcmp(a->src.addr.v.tblname, b->src.addr.v.tblname)); @@ -1267,8 +1312,8 @@ again: tablenum++; - if (pfctl_define_table(tbl->pt_name, PFR_TFLAG_CONST, 1, pf->anchor, - tbl->pt_buf, pf->tticket)) { + if (pfctl_define_table(tbl->pt_name, PFR_TFLAG_CONST, 1, + pf->anchor->name, tbl->pt_buf, pf->anchor->ruleset.tticket)) { warn("failed to create table %s", tbl->pt_name); return (1); } @@ -1367,15 +1412,34 @@ superblock_inclusive(struct superblock *block, struct pf_opt_rule *por) } } - /* 'anchor' heads and per-rule src-track are also hard breaks */ - if (por->por_anchor[0] != '\0' || - (por->por_rule.rule_flag & PFRULE_RULESRCTRACK)) + /* per-rule src-track is also a hard break */ + if (por->por_rule.rule_flag & PFRULE_RULESRCTRACK) return (0); + /* + * Have to handle interface groups seperately. Consider the following + * rules: + * block on EXTIFS to any port 22 + * pass on em0 to any port 22 + * (where EXTIFS is an arbitrary interface group) + * The optimizer may decide to re-order the pass rule in front of the + * block rule. But what if EXTIFS includes em0??? Such a reordering + * would change the meaning of the ruleset. + * We can't just lookup the EXTIFS group and check if em0 is a member + * because the user is allowed to add interfaces to a group during + * runtime. + * Ergo interface groups become a defacto superblock break :-( + */ + if (interface_group(por->por_rule.ifname) || + interface_group(TAILQ_FIRST(&block->sb_rules)->por_rule.ifname)) { + if (strcasecmp(por->por_rule.ifname, + TAILQ_FIRST(&block->sb_rules)->por_rule.ifname) != 0) + return (0); + } + comparable_rule(&a, &TAILQ_FIRST(&block->sb_rules)->por_rule, NOMERGE); comparable_rule(&b, &por->por_rule, NOMERGE); - if (strcmp(TAILQ_FIRST(&block->sb_rules)->por_anchor, - por->por_anchor) == 0 && memcmp(&a, &b, sizeof(a)) == 0) + if (memcmp(&a, &b, sizeof(a)) == 0) return (1); #ifdef OPT_DEBUG @@ -1419,6 +1483,24 @@ superblock_inclusive(struct superblock *block, struct pf_opt_rule *por) /* + * Figure out if an interface name is an actual interface or actually a + * group of interfaces. + */ +int +interface_group(const char *ifname) +{ + if (ifname == NULL || !ifname[0]) + return (0); + + /* Real interfaces must end in a number, interface groups do not */ + if (isdigit(ifname[strlen(ifname) - 1])) + return (0); + else + return (1); +} + + +/* * Make a rule that can directly compared by memcmp() */ void diff --git a/contrib/pf/pfctl/pfctl_osfp.c b/contrib/pf/pfctl/pfctl_osfp.c index 23e3ccc..7018d6c 100644 --- a/contrib/pf/pfctl/pfctl_osfp.c +++ b/contrib/pf/pfctl/pfctl_osfp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl_osfp.c,v 1.12 2005/02/17 13:18:00 aaron Exp $ */ +/* $OpenBSD: pfctl_osfp.c,v 1.15 2006/12/13 05:10:15 itojun Exp $ */ /* * Copyright (c) 2003 Mike Frantzen <frantzen@openbsd.org> @@ -23,6 +23,10 @@ #include <net/if.h> #include <net/pfvar.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> + #include <ctype.h> #include <err.h> #include <errno.h> @@ -240,6 +244,10 @@ pfctl_file_fingerprints(int dev, int opts, const char *fp_filename) sizeof(fp.fp_os.fp_subtype_nm)); add_fingerprint(dev, opts, &fp); + + fp.fp_flags |= (PF_OSFP_DF | PF_OSFP_INET6); + fp.fp_psize += sizeof(struct ip6_hdr) - sizeof(struct ip); + add_fingerprint(dev, opts, &fp); } if (class) @@ -250,6 +258,8 @@ pfctl_file_fingerprints(int dev, int opts, const char *fp_filename) free(subtype); if (desc) free(desc); + if (tcpopts) + free(tcpopts); fclose(in); @@ -762,7 +772,6 @@ sort_name_list(int opts, struct name_list *nml) LIST_INSERT_AFTER(nmlast, nm, nm_entry); nmlast = nm; } - return; } /* parse the next integer in a formatted config file line */ diff --git a/contrib/pf/pfctl/pfctl_parser.c b/contrib/pf/pfctl/pfctl_parser.c index 4d5ca0d..e7b3b85 100644 --- a/contrib/pf/pfctl/pfctl_parser.c +++ b/contrib/pf/pfctl/pfctl_parser.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl_parser.c,v 1.211 2004/12/07 10:33:41 dhartmei Exp $ */ +/* $OpenBSD: pfctl_parser.c,v 1.234 2006/10/31 23:46:24 mcbride Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -54,6 +54,7 @@ #include <errno.h> #include <err.h> #include <ifaddrs.h> +#include <unistd.h> #include "pfctl_parser.h" #include "pfctl.h" @@ -66,6 +67,7 @@ void print_fromto(struct pf_rule_addr *, pf_osfp_t, struct pf_rule_addr *, u_int8_t, u_int8_t, int); int ifa_skip_if(const char *filter, struct node_host *p); +struct node_host *ifa_grouplookup(const char *, int); struct node_host *host_if(const char *, int); struct node_host *host_v4(const char *, int); struct node_host *host_v6(const char *, int); @@ -479,9 +481,11 @@ const char *pf_scounters[FCNT_MAX+1] = FCNT_NAMES; void print_status(struct pf_status *s, int opts) { - char statline[80], *running; - time_t runtime; - int i; + char statline[80], *running; + time_t runtime; + int i; + char buf[PF_MD5_DIGEST_LENGTH * 2 + 1]; + static const char hex[] = "0123456789abcdef"; runtime = time(NULL) - s->since; running = s->running ? "Enabled" : "Disabled"; @@ -515,7 +519,18 @@ print_status(struct pf_status *s, int opts) printf("%15s\n\n", "Debug: Loud"); break; } - printf("Hostid: 0x%08x\n\n", ntohl(s->hostid)); + + if (opts & PF_OPT_VERBOSE) { + printf("Hostid: 0x%08x\n", ntohl(s->hostid)); + + for (i = 0; i < PF_MD5_DIGEST_LENGTH; i++) { + buf[i + i] = hex[s->pf_chksum[i] >> 4]; + buf[i + i + 1] = hex[s->pf_chksum[i] & 0x0f]; + } + buf[i + i] = '\0'; + printf("Checksum: 0x%s\n\n", buf); + } + if (s->ifname[0] != 0) { printf("Interface Stats for %-16s %5s %16s\n", s->ifname, "IPv4", "IPv6"); @@ -623,7 +638,9 @@ print_src_node(struct pf_src_node *sn, int opts) printf(", expires in %.2u:%.2u:%.2u", sn->expire, min, sec); } - printf(", %u pkts, %u bytes", sn->packets, sn->bytes); + printf(", %llu pkts, %llu bytes", + sn->packets[0] + sn->packets[1], + sn->bytes[0] + sn->bytes[1]); switch (sn->ruletype) { case PF_NAT: if (sn->rule.nr != -1) @@ -656,10 +673,13 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) printf("@%d ", r->nr); if (r->action > PF_NORDR) printf("action(%d)", r->action); - else if (anchor_call[0]) - printf("%s \"%s\"", anchortypes[r->action], - anchor_call); - else { + else if (anchor_call[0]) { + if (anchor_call[0] == '_') { + printf("%s", anchortypes[r->action]); + } else + printf("%s \"%s\"", anchortypes[r->action], + anchor_call); + } else { printf("%s", actiontypes[r->action]); if (r->natpass) printf(" pass"); @@ -714,10 +734,22 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) printf(" in"); else if (r->direction == PF_OUT) printf(" out"); - if (r->log == 1) + if (r->log) { printf(" log"); - else if (r->log == 2) - printf(" log-all"); + if (r->log & ~PF_LOG || r->logif) { + int count = 0; + + printf(" ("); + if (r->log & PF_LOG_ALL) + printf("%sall", count++ ? ", " : ""); + if (r->log & PF_LOG_SOCKET_LOOKUP) + printf("%suser", count++ ? ", " : ""); + if (r->logif) + printf("%sto pflog%u", count++ ? ", " : "", + r->logif); + printf(")"); + } + } if (r->quick) printf(" quick"); if (r->ifname[0]) { @@ -767,7 +799,11 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) print_flags(r->flags); printf("/"); print_flags(r->flagset); - } + } else if (r->action == PF_PASS && + (!r->proto || r->proto == IPPROTO_TCP) && + !(r->rule_flag & PFRULE_FRAGMENT) && + !anchor_call[0] && r->keep_state) + printf(" flags any"); if (r->type) { const struct icmptypeent *it; @@ -792,7 +828,9 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) } if (r->tos) printf(" tos 0x%2.2x", r->tos); - if (r->keep_state == PF_STATE_NORMAL) + if (!r->keep_state && r->action == PF_PASS && !anchor_call[0]) + printf(" no state"); + else if (r->keep_state == PF_STATE_NORMAL) printf(" keep state"); else if (r->keep_state == PF_STATE_MODULATE) printf(" modulate state"); @@ -820,7 +858,7 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) opts = 1; if (r->rule_flag & PFRULE_SRCTRACK) opts = 1; - if (r->rule_flag & (PFRULE_IFBOUND | PFRULE_GRBOUND)) + if (r->rule_flag & PFRULE_IFBOUND) opts = 1; for (i = 0; !opts && i < PFTM_MAX; ++i) if (r->timeout[i]) @@ -888,12 +926,6 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) printf("if-bound"); opts = 0; } - if (r->rule_flag & PFRULE_GRBOUND) { - if (!opts) - printf(", "); - printf("group-bound"); - opts = 0; - } for (i = 0; i < PFTM_MAX; ++i) if (r->timeout[i]) { int j; @@ -901,12 +933,13 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) if (!opts) printf(", "); opts = 0; - for (j = 0; j < sizeof(pf_timeouts) / - sizeof(pf_timeouts[0]); ++j) + for (j = 0; pf_timeouts[j].name != NULL; + ++j) if (pf_timeouts[j].timeout == i) break; - printf("%s %u", j == PFTM_MAX ? "inv.timeout" : - pf_timeouts[j].name, r->timeout[i]); + printf("%s %u", pf_timeouts[j].name == NULL ? + "inv.timeout" : pf_timeouts[j].name, + r->timeout[i]); } printf(")"); } @@ -946,13 +979,14 @@ print_rule(struct pf_rule *r, const char *anchor_call, int verbose) printf(" !"); printf(" tagged %s", r->match_tagname); } + if (r->rtableid != -1) + printf(" rtable %u", r->rtableid); if (!anchor_call[0] && (r->action == PF_NAT || r->action == PF_BINAT || r->action == PF_RDR)) { printf(" -> "); print_pool(&r->rpool, r->rpool.proxy_port[0], r->rpool.proxy_port[1], r->af, r->action); } - printf("\n"); } void @@ -1145,13 +1179,31 @@ ifa_load(void) } struct node_host * -ifa_exists(const char *ifa_name, int group_ok) +ifa_exists(const char *ifa_name) { struct node_host *n; + struct ifgroupreq ifgr; + int s; if (iftab == NULL) ifa_load(); + /* check wether this is a group */ + if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) + err(1, "socket"); + bzero(&ifgr, sizeof(ifgr)); + strlcpy(ifgr.ifgr_name, ifa_name, sizeof(ifgr.ifgr_name)); + if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == 0) { + /* fake a node_host */ + if ((n = calloc(1, sizeof(*n))) == NULL) + err(1, "calloc"); + if ((n->ifname = strdup(ifa_name)) == NULL) + err(1, "strdup"); + close(s); + return (n); + } + close(s); + for (n = iftab; n; n = n->next) { if (n->af == AF_LINK && !strncmp(n->ifname, ifa_name, IFNAMSIZ)) return (n); @@ -1161,12 +1213,56 @@ ifa_exists(const char *ifa_name, int group_ok) } struct node_host * +ifa_grouplookup(const char *ifa_name, int flags) +{ + struct ifg_req *ifg; + struct ifgroupreq ifgr; + int s, len; + struct node_host *n, *h = NULL; + + if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1) + err(1, "socket"); + bzero(&ifgr, sizeof(ifgr)); + strlcpy(ifgr.ifgr_name, ifa_name, sizeof(ifgr.ifgr_name)); + if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1) { + close(s); + return (NULL); + } + + len = ifgr.ifgr_len; + if ((ifgr.ifgr_groups = calloc(1, len)) == NULL) + err(1, "calloc"); + if (ioctl(s, SIOCGIFGMEMB, (caddr_t)&ifgr) == -1) + err(1, "SIOCGIFGMEMB"); + + for (ifg = ifgr.ifgr_groups; ifg && len >= sizeof(struct ifg_req); + ifg++) { + len -= sizeof(struct ifg_req); + if ((n = ifa_lookup(ifg->ifgrq_member, flags)) == NULL) + continue; + if (h == NULL) + h = n; + else { + h->tail->next = n; + h->tail = n->tail; + } + } + free(ifgr.ifgr_groups); + close(s); + + return (h); +} + +struct node_host * ifa_lookup(const char *ifa_name, int flags) { struct node_host *p = NULL, *h = NULL, *n = NULL; int got4 = 0, got6 = 0; const char *last_if = NULL; + if ((h = ifa_grouplookup(ifa_name, flags)) != NULL) + return (h); + if (!strncmp(ifa_name, "self", IFNAMSIZ)) ifa_name = NULL; @@ -1344,7 +1440,7 @@ host_if(const char *s, int mask) free(ps); return (NULL); } - if (ifa_exists(ps, 1) || !strncmp(ps, "self", IFNAMSIZ)) { + if (ifa_exists(ps) || !strncmp(ps, "self", IFNAMSIZ)) { /* interface with this name exists */ h = ifa_lookup(ps, flags); for (n = h; n != NULL && mask > -1; n = n->next) diff --git a/contrib/pf/pfctl/pfctl_parser.h b/contrib/pf/pfctl/pfctl_parser.h index 5f32942..b901fb9 100644 --- a/contrib/pf/pfctl/pfctl_parser.h +++ b/contrib/pf/pfctl/pfctl_parser.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl_parser.h,v 1.80 2005/02/07 18:18:14 david Exp $ */ +/* $OpenBSD: pfctl_parser.h,v 1.86 2006/10/31 23:46:25 mcbride Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -47,14 +47,17 @@ #define PF_OPT_DEBUG 0x0200 #define PF_OPT_SHOWALL 0x0400 #define PF_OPT_OPTIMIZE 0x0800 -#define PF_OPT_OPTIMIZE_PROFILE 0x1000 #define PF_OPT_MERGE 0x2000 +#define PF_OPT_RECURSE 0x4000 #define PF_TH_ALL 0xFF #define PF_NAT_PROXY_PORT_LOW 50001 #define PF_NAT_PROXY_PORT_HIGH 65535 +#define PF_OPTIMIZE_BASIC 0x0001 +#define PF_OPTIMIZE_PROFILE 0x0002 + #define FCNT_NAMES { \ "searches", \ "inserts", \ @@ -63,24 +66,25 @@ } struct pfr_buffer; /* forward definition */ -struct pf_opt_rule; -TAILQ_HEAD(pf_opt_queue, pf_opt_rule); struct pfctl { int dev; int opts; + int optimize; int loadopt; - u_int32_t tticket; /* table ticket */ + int asd; /* anchor stack depth */ + int bn; /* brace number */ + int brace; int tdirty; /* kernel dirty */ - u_int32_t rule_nr; +#define PFCTL_ANCHOR_STACK_DEPTH 64 + struct pf_anchor *astack[PFCTL_ANCHOR_STACK_DEPTH]; struct pfioc_pooladdr paddr; struct pfioc_altq *paltq; struct pfioc_queue *pqueue; struct pfr_buffer *trans; - const char *anchor; + struct pf_anchor *anchor, *alast; const char *ruleset; - struct pf_opt_queue opt_queue; /* 'set foo' options */ u_int32_t timeout[PFTM_MAX]; @@ -117,10 +121,6 @@ struct node_host { struct node_host *next; struct node_host *tail; }; -/* special flags used by ifa_exists */ -#define PF_IFA_FLAG_GROUP 0x10000 -#define PF_IFA_FLAG_DYNAMIC 0x20000 -#define PF_IFA_FLAG_CLONABLE 0x40000 struct node_os { char *os; @@ -180,19 +180,20 @@ struct pf_opt_rule { struct pf_rule por_rule; struct pf_opt_tbl *por_src_tbl; struct pf_opt_tbl *por_dst_tbl; - char por_anchor[MAXPATHLEN]; u_int64_t por_profile_count; TAILQ_ENTRY(pf_opt_rule) por_entry; TAILQ_ENTRY(pf_opt_rule) por_skip_entry[PF_SKIP_COUNT]; }; +TAILQ_HEAD(pf_opt_queue, pf_opt_rule); -int pfctl_rules(int, char *, int, char *, struct pfr_buffer *); -int pfctl_optimize_rules(struct pfctl *); +int pfctl_rules(int, char *, FILE *, int, int, char *, struct pfr_buffer *); +int pfctl_optimize_ruleset(struct pfctl *, struct pf_ruleset *); int pfctl_add_rule(struct pfctl *, struct pf_rule *, const char *); int pfctl_add_altq(struct pfctl *, struct pf_altq *); int pfctl_add_pool(struct pfctl *, struct pf_pool *, sa_family_t); +void pfctl_move_pool(struct pf_pool *, struct pf_pool *); void pfctl_clear_pool(struct pf_pool *); int pfctl_set_timeout(struct pfctl *, const char *, int, int); @@ -205,7 +206,7 @@ int pfctl_set_interface_flags(struct pfctl *, char *, int, int); int parse_rules(FILE *, struct pfctl *); int parse_flags(char *); -int pfctl_load_anchors(int, int, struct pfr_buffer *); +int pfctl_load_anchors(int, struct pfctl *, struct pfr_buffer *); void print_pool(struct pf_pool *, u_int16_t, u_int16_t, sa_family_t, int); void print_src_node(struct pf_src_node *, int); @@ -267,7 +268,7 @@ void set_ipmask(struct node_host *, u_int8_t); int check_netmask(struct node_host *, sa_family_t); int unmask(struct pf_addr *, sa_family_t); void ifa_load(void); -struct node_host *ifa_exists(const char *, int); +struct node_host *ifa_exists(const char *); struct node_host *ifa_lookup(const char *, int); struct node_host *host(const char *); diff --git a/contrib/pf/pfctl/pfctl_radix.c b/contrib/pf/pfctl/pfctl_radix.c index ba004b8..01ad475 100644 --- a/contrib/pf/pfctl/pfctl_radix.c +++ b/contrib/pf/pfctl/pfctl_radix.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl_radix.c,v 1.26 2004/06/14 20:44:22 cedric Exp $ */ +/* $OpenBSD: pfctl_radix.c,v 1.27 2005/05/21 21:03:58 henning Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -421,7 +421,7 @@ pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, /* interface management code */ int -pfi_get_ifaces(const char *filter, struct pfi_if *buf, int *size, int flags) +pfi_get_ifaces(const char *filter, struct pfi_kif *buf, int *size) { struct pfioc_iface io; @@ -430,7 +430,6 @@ pfi_get_ifaces(const char *filter, struct pfi_if *buf, int *size, int flags) return (-1); } bzero(&io, sizeof io); - io.pfiio_flags = flags; if (filter != NULL) if (strlcpy(io.pfiio_name, filter, sizeof(io.pfiio_name)) >= sizeof(io.pfiio_name)) { @@ -451,7 +450,7 @@ pfi_get_ifaces(const char *filter, struct pfi_if *buf, int *size, int flags) size_t buf_esize[PFRB_MAX] = { 0, sizeof(struct pfr_table), sizeof(struct pfr_tstats), sizeof(struct pfr_addr), sizeof(struct pfr_astats), - sizeof(struct pfi_if), sizeof(struct pfioc_trans_e) + sizeof(struct pfi_kif), sizeof(struct pfioc_trans_e) }; /* diff --git a/contrib/pf/pfctl/pfctl_table.c b/contrib/pf/pfctl/pfctl_table.c index 9c7ba5b..bee5786 100644 --- a/contrib/pf/pfctl/pfctl_table.c +++ b/contrib/pf/pfctl/pfctl_table.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pfctl_table.c,v 1.62 2004/12/22 17:17:55 dhartmei Exp $ */ +/* $OpenBSD: pfctl_table.c,v 1.66 2007/03/01 17:20:54 deraadt Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -61,8 +61,7 @@ static void print_addrx(struct pfr_addr *, struct pfr_addr *, int); static void print_astats(struct pfr_astats *, int); static void radix_perror(void); static void xprintf(int, const char *, ...); -static void print_iface(struct pfi_if *, int); -static void oprintf(int, int, const char *, int *, int); +static void print_iface(struct pfi_kif *, int); static const char *stats_text[PFR_DIR_MAX][PFR_OP_TABLE_MAX] = { { "In/Block:", "In/Pass:", "In/XPass:" }, @@ -175,7 +174,7 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, break; } - if (opts & PF_OPT_SHOWALL && b.pfrb_size > 0) + if ((opts & PF_OPT_SHOWALL) && b.pfrb_size > 0) pfctl_print_title("TABLES:"); PFRB_FOREACH(p, &b) @@ -254,6 +253,42 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) print_addrx(a, NULL, opts & PF_OPT_USEDNS); + } else if (!strcmp(command, "expire")) { + const char *errstr; + u_int lifetime; + + b.pfrb_type = PFRB_ASTATS; + b2.pfrb_type = PFRB_ADDRS; + if (argc != 1 || file != NULL) + usage(); + lifetime = strtonum(*argv, 0, UINT_MAX, &errstr); + if (errstr) + errx(1, "expiry time: %s", errstr); + for (;;) { + pfr_buf_grow(&b, b.pfrb_size); + b.pfrb_size = b.pfrb_msize; + RVTEST(pfr_get_astats(&table, b.pfrb_caddr, + &b.pfrb_size, flags)); + if (b.pfrb_size <= b.pfrb_msize) + break; + } + PFRB_FOREACH(p, &b) + if (time(NULL) - ((struct pfr_astats *)p)->pfras_tzero > + lifetime) + if (pfr_buf_add(&b2, + &((struct pfr_astats *)p)->pfras_a)) + err(1, "duplicate buffer"); + + if (opts & PF_OPT_VERBOSE) + flags |= PFR_FLAG_FEEDBACK; + RVTEST(pfr_del_addrs(&table, b2.pfrb_caddr, b2.pfrb_size, + &ndel, flags)); + xprintf(opts, "%d/%d addresses expired", ndel, b2.pfrb_size); + if (opts & PF_OPT_VERBOSE) + PFRB_FOREACH(a, &b2) + if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + print_addrx(a, NULL, + opts & PF_OPT_USEDNS); } else if (!strcmp(command, "show")) { b.pfrb_type = (opts & PF_OPT_VERBOSE) ? PFRB_ASTATS : PFRB_ADDRS; @@ -291,7 +326,7 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, RVTEST(pfr_tst_addrs(&table, b.pfrb_caddr, b.pfrb_size, &nmatch, flags)); xprintf(opts, "%d/%d addresses match", nmatch, b.pfrb_size); - if (opts & PF_OPT_VERBOSE && !(opts & PF_OPT_VERBOSE2)) + if ((opts & PF_OPT_VERBOSE) && !(opts & PF_OPT_VERBOSE2)) PFRB_FOREACH(a, &b) if (a->pfra_fback == PFR_FB_MATCH) print_addrx(a, NULL, @@ -539,17 +574,15 @@ int pfctl_show_ifaces(const char *filter, int opts) { struct pfr_buffer b; - struct pfi_if *p; - int i = 0, f = PFI_FLAG_GROUP|PFI_FLAG_INSTANCE; + struct pfi_kif *p; + int i = 0; - if (filter != NULL && *filter && !isdigit(filter[strlen(filter)-1])) - f &= ~PFI_FLAG_INSTANCE; bzero(&b, sizeof(b)); b.pfrb_type = PFRB_IFACES; for (;;) { pfr_buf_grow(&b, b.pfrb_size); b.pfrb_size = b.pfrb_msize; - if (pfi_get_ifaces(filter, b.pfrb_caddr, &b.pfrb_size, f)) { + if (pfi_get_ifaces(filter, b.pfrb_caddr, &b.pfrb_size)) { radix_perror(); return (1); } @@ -565,46 +598,30 @@ pfctl_show_ifaces(const char *filter, int opts) } void -print_iface(struct pfi_if *p, int opts) +print_iface(struct pfi_kif *p, int opts) { - time_t tzero = p->pfif_tzero; - int flags = (opts & PF_OPT_VERBOSE) ? p->pfif_flags : 0; - int first = 1; + time_t tzero = p->pfik_tzero; int i, af, dir, act; - printf("%s", p->pfif_name); - oprintf(flags, PFI_IFLAG_INSTANCE, "instance", &first, 0); - oprintf(flags, PFI_IFLAG_GROUP, "group", &first, 0); - oprintf(flags, PFI_IFLAG_CLONABLE, "clonable", &first, 0); - oprintf(flags, PFI_IFLAG_DYNAMIC, "dynamic", &first, 0); - oprintf(flags, PFI_IFLAG_ATTACHED, "attached", &first, 0); - oprintf(flags, PFI_IFLAG_SKIP, "skipped", &first, 1); + printf("%s", p->pfik_name); + if (opts & PF_OPT_VERBOSE) { + if (p->pfik_flags & PFI_IFLAG_SKIP) + printf(" (skip)"); + } printf("\n"); if (!(opts & PF_OPT_VERBOSE2)) return; printf("\tCleared: %s", ctime(&tzero)); printf("\tReferences: [ States: %-18d Rules: %-18d ]\n", - p->pfif_states, p->pfif_rules); + p->pfik_states, p->pfik_rules); for (i = 0; i < 8; i++) { af = (i>>2) & 1; dir = (i>>1) &1; act = i & 1; printf("\t%-12s [ Packets: %-18llu Bytes: %-18llu ]\n", istats_text[af][dir][act], - (unsigned long long)p->pfif_packets[af][dir][act], - (unsigned long long)p->pfif_bytes[af][dir][act]); + (unsigned long long)p->pfik_packets[af][dir][act], + (unsigned long long)p->pfik_bytes[af][dir][act]); } } - -void -oprintf(int flags, int flag, const char *s, int *first, int last) -{ - if (flags & flag) { - printf(*first ? "\t(%s" : ", %s", s); - *first = 0; - } - if (last && !*first) - printf(")"); -} - diff --git a/contrib/pf/pflogd/pflogd.8 b/contrib/pf/pflogd/pflogd.8 index d13b772..cbb7802 100644 --- a/contrib/pf/pflogd/pflogd.8 +++ b/contrib/pf/pflogd/pflogd.8 @@ -1,4 +1,4 @@ -.\" $OpenBSD: pflogd.8,v 1.25 2005/01/02 18:15:02 jmc Exp $ +.\" $OpenBSD: pflogd.8,v 1.32 2006/12/08 10:26:38 joel Exp $ .\" .\" Copyright (c) 2001 Can Erkin Acar. All rights reserved. .\" @@ -35,14 +35,17 @@ .Op Fl Dx .Op Fl d Ar delay .Op Fl f Ar filename +.Op Fl i Ar interface .Op Fl s Ar snaplen .Op Ar expression .Sh DESCRIPTION .Nm is a background daemon which reads packets logged by .Xr pf 4 -to the packet logging interface -.Pa pflog0 +to a +.Xr pflog 4 +interface, normally +.Pa pflog0 , and writes the packets to a logfile (normally .Pa /var/log/pflog ) in @@ -81,7 +84,9 @@ temporarily uses the old snaplen to keep the log file consistent. tries to preserve the integrity of the log file against I/O errors. Furthermore, integrity of an existing log file is verified before appending. -If there is an invalid log file or an I/O error, logging is suspended until a +If there is an invalid log file or an I/O error, the log file is moved +out of the way and a new one is created. +If a new file cannot be created, logging is suspended until a .Dv SIGHUP or a .Dv SIGALRM @@ -101,11 +106,19 @@ If not specified, the default is 60 seconds. Log output filename. Default is .Pa /var/log/pflog . +.It Fl i Ar interface +Specifies the +.Xr pflog 4 +interface to use. +By default, +.Nm +will use +.Ar pflog0 . .It Fl s Ar snaplen Analyze at most the first .Ar snaplen -bytes of data from each packet rather than the default of 96. -The default of 96 is adequate for IP, ICMP, TCP, and UDP headers but may +bytes of data from each packet rather than the default of 116. +The default of 116 is adequate for IP, ICMP, TCP, and UDP headers but may truncate protocol information for other protocols. Other file parsers may desire a higher snaplen. .It Fl x @@ -129,6 +142,13 @@ Log specific tcp packets to a different log file with a large snaplen # pflogd -s 1600 -f suspicious.log port 80 and host evilhost .Ed .Pp +Log from another +.Xr pflog 4 +interface, excluding specific packets: +.Bd -literal -offset indent +# pflogd -i pflog3 -f network3.log "not (tcp and port 23)" +.Ed +.Pp Display binary logs: .Bd -literal -offset indent # tcpdump -n -e -ttt -r /var/log/pflog @@ -148,7 +168,7 @@ Tcpdump can restrict the output to packets logged on a specified interface, a rule number, a reason, a direction, an IP family or an action. .Pp -.Bl -tag -width "reason match " -compact +.Bl -tag -width "ruleset authpf " -compact .It ip Address family equals IPv4. .It ip6 @@ -157,12 +177,16 @@ Address family equals IPv6. Interface name equals "kue0". .It on kue0 Interface name equals "kue0". +.It ruleset authpf +Ruleset name equals "authpf". .It rulenum 10 Rule number equals 10. .It reason match Reason equals match. Also accepts "bad-offset", "fragment", "bad-timestamp", "short", -"normalize" and "memory". +"normalize", "memory", "congestion", "ip-option", "proto-cksum", +"state-mismatch", "state-insert", "state-limit", "src-limit", +and "synproxy". .It action pass Action equals pass. Also accepts "block". @@ -190,4 +214,6 @@ The command appeared in .Ox 3.0 . .Sh AUTHORS -Can Erkin Acar +.Nm +was written by +.An Can Erkin Acar Aq canacar@openbsd.org . diff --git a/contrib/pf/pflogd/pflogd.c b/contrib/pf/pflogd/pflogd.c index cc474e3..168deb1 100644 --- a/contrib/pf/pflogd/pflogd.c +++ b/contrib/pf/pflogd/pflogd.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pflogd.c,v 1.33 2005/02/09 12:09:30 henning Exp $ */ +/* $OpenBSD: pflogd.c,v 1.37 2006/10/26 13:34:47 jmc Exp $ */ /* * Copyright (c) 2001 Theo de Raadt @@ -73,7 +73,7 @@ int flush_buffer(FILE *); int init_pcap(void); void logmsg(int, const char *, ...); void purge_buffer(void); -int reset_dump(void); +int reset_dump(int); int scan_dump(FILE *, off_t); int set_snaplen(int); void set_suspended(int); @@ -82,6 +82,8 @@ void sig_close(int); void sig_hup(int); void usage(void); +static int try_reset_dump(int); + /* buffer must always be greater than snaplen */ static int bufpkt = 0; /* number of packets in buffer */ static int buflen = 0; /* allocated size of buffer */ @@ -100,8 +102,9 @@ set_suspended(int s) return; suspended = s; - setproctitle("[%s] -s %d -f %s", - suspended ? "suspended" : "running", cur_snaplen, filename); + setproctitle("[%s] -s %d -i %s -f %s", + suspended ? "suspended" : "running", + cur_snaplen, interface, filename); } char * @@ -147,8 +150,9 @@ logmsg(int pri, const char *message, ...) __dead void usage(void) { - fprintf(stderr, "usage: pflogd [-Dx] [-d delay] [-f filename] "); - fprintf(stderr, "[-s snaplen] [expression]\n"); + fprintf(stderr, "usage: pflogd [-Dx] [-d delay] [-f filename]"); + fprintf(stderr, " [-i interface] [-s snaplen]\n"); + fprintf(stderr, " [expression]\n"); exit(1); } @@ -228,7 +232,25 @@ set_snaplen(int snap) } int -reset_dump(void) +reset_dump(int nomove) +{ + int ret; + + for (;;) { + ret = try_reset_dump(nomove); + if (ret <= 0) + break; + } + + return (ret); +} + +/* + * tries to (re)open log file, nomove flag is used with -x switch + * returns 0: success, 1: retry (log moved), -1: error + */ +int +try_reset_dump(int nomove) { struct pcap_file_header hdr; struct stat st; @@ -250,26 +272,26 @@ reset_dump(void) */ fd = priv_open_log(); if (fd < 0) - return (1); + return (-1); fp = fdopen(fd, "a+"); if (fp == NULL) { - close(fd); logmsg(LOG_ERR, "Error: %s: %s", filename, strerror(errno)); - return (1); + close(fd); + return (-1); } if (fstat(fileno(fp), &st) == -1) { - fclose(fp); logmsg(LOG_ERR, "Error: %s: %s", filename, strerror(errno)); - return (1); + fclose(fp); + return (-1); } /* set FILE unbuffered, we do our own buffering */ if (setvbuf(fp, NULL, _IONBF, 0)) { - fclose(fp); logmsg(LOG_ERR, "Failed to set output buffers"); - return (1); + fclose(fp); + return (-1); } #define TCPDUMP_MAGIC 0xa1b2c3d4 @@ -277,11 +299,9 @@ reset_dump(void) if (st.st_size == 0) { if (snaplen != cur_snaplen) { logmsg(LOG_NOTICE, "Using snaplen %d", snaplen); - if (set_snaplen(snaplen)) { - fclose(fp); + if (set_snaplen(snaplen)) logmsg(LOG_WARNING, "Failed, using old settings"); - } } hdr.magic = TCPDUMP_MAGIC; hdr.version_major = PCAP_VERSION_MAJOR; @@ -293,11 +313,15 @@ reset_dump(void) if (fwrite((char *)&hdr, sizeof(hdr), 1, fp) != 1) { fclose(fp); - return (1); + return (-1); } } else if (scan_dump(fp, st.st_size)) { - /* XXX move file and continue? */ fclose(fp); + if (nomove || priv_move_log()) { + logmsg(LOG_ERR, + "Invalid/incompatible log file, move it away"); + return (-1); + } return (1); } @@ -336,7 +360,6 @@ scan_dump(FILE *fp, off_t size) hdr.version_minor != PCAP_VERSION_MINOR || hdr.linktype != hpcap->linktype || hdr.snaplen > PFLOGD_MAXSNAPLEN) { - logmsg(LOG_ERR, "Invalid/incompatible log file, move it away"); return (1); } @@ -511,7 +534,7 @@ main(int argc, char **argv) closefrom(STDERR_FILENO + 1); - while ((ch = getopt(argc, argv, "Dxd:s:f:")) != -1) { + while ((ch = getopt(argc, argv, "Dxd:f:i:s:")) != -1) { switch (ch) { case 'D': Debug = 1; @@ -524,6 +547,9 @@ main(int argc, char **argv) case 'f': filename = optarg; break; + case 'i': + interface = optarg; + break; case 's': snaplen = strtonum(optarg, 0, PFLOGD_MAXSNAPLEN, &errstr); @@ -596,7 +622,7 @@ main(int argc, char **argv) bufpkt = 0; } - if (reset_dump()) { + if (reset_dump(Xflag) < 0) { if (Xflag) return (1); @@ -614,7 +640,7 @@ main(int argc, char **argv) if (gotsig_close) break; if (gotsig_hup) { - if (reset_dump()) { + if (reset_dump(0)) { logmsg(LOG_ERR, "Logging suspended: open error"); set_suspended(1); @@ -625,6 +651,8 @@ main(int argc, char **argv) if (gotsig_alrm) { if (dpcap) flush_buffer(dpcap); + else + gotsig_hup = 1; gotsig_alrm = 0; alarm(delay); } diff --git a/contrib/pf/pflogd/pflogd.h b/contrib/pf/pflogd/pflogd.h index 3baecb6..596e696 100644 --- a/contrib/pf/pflogd/pflogd.h +++ b/contrib/pf/pflogd/pflogd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pflogd.h,v 1.2 2004/01/15 20:15:14 canacar Exp $ */ +/* $OpenBSD: pflogd.h,v 1.3 2006/01/15 16:38:04 canacar Exp $ */ /* * Copyright (c) 2003 Can Erkin Acar @@ -37,6 +37,7 @@ void logmsg(int priority, const char *message, ...); int priv_init(void); int priv_set_snaplen(int snaplen); int priv_open_log(void); +int priv_move_log(void); pcap_t *pcap_open_live_fd(int fd, int snaplen, char *ebuf); void set_pcap_filter(void); diff --git a/contrib/pf/pflogd/privsep.c b/contrib/pf/pflogd/privsep.c index 33d6b9c..1139cb4 100644 --- a/contrib/pf/pflogd/privsep.c +++ b/contrib/pf/pflogd/privsep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: privsep.c,v 1.13 2004/12/22 09:21:02 otto Exp $ */ +/* $OpenBSD: privsep.c,v 1.16 2006/10/25 20:55:04 moritz Exp $ */ /* * Copyright (c) 2003 Can Erkin Acar @@ -16,7 +16,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include <sys/ioctl.h> #include <sys/types.h> #include <sys/time.h> #include <sys/socket.h> @@ -28,6 +27,7 @@ #include <err.h> #include <errno.h> #include <fcntl.h> +#include <limits.h> #include <pcap.h> #include <pcap-int.h> #include <pwd.h> @@ -41,6 +41,7 @@ enum cmd_types { PRIV_SET_SNAPLEN, /* set the snaplength */ + PRIV_MOVE_LOG, /* move logfile away */ PRIV_OPEN_LOG /* open logfile for appending */ }; @@ -55,10 +56,8 @@ static int may_read(int, void *, size_t); static void must_read(int, void *, size_t); static void must_write(int, void *, size_t); static int set_snaplen(int snap); +static int move_log(const char *name); -/* bpf filter expression common to parent and child */ -extern char *filter; -extern char *errbuf; extern char *filename; extern pcap_t *hpcap; @@ -96,16 +95,12 @@ priv_init(void) err(1, "unable to chdir"); gidset[0] = pw->pw_gid; + if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) == -1) + err(1, "setresgid() failed"); if (setgroups(1, gidset) == -1) err(1, "setgroups() failed"); - if (setegid(pw->pw_gid) == -1) - err(1, "setegid() failed"); - if (setgid(pw->pw_gid) == -1) - err(1, "setgid() failed"); - if (seteuid(pw->pw_uid) == -1) - err(1, "seteuid() failed"); - if (setuid(pw->pw_uid) == -1) - err(1, "setuid() failed"); + if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) == -1) + err(1, "setresuid() failed"); close(socks[0]); priv_fd = socks[1]; return 0; @@ -159,6 +154,13 @@ priv_init(void) close(fd); break; + case PRIV_MOVE_LOG: + logmsg(LOG_DEBUG, + "[priv]: msg PRIV_MOVE_LOG received"); + ret = move_log(filename); + must_write(socks[0], &ret, sizeof(int)); + break; + default: logmsg(LOG_ERR, "[priv]: unknown command %d", cmd); _exit(1); @@ -182,6 +184,47 @@ set_snaplen(int snap) return 0; } +static int +move_log(const char *name) +{ + char ren[PATH_MAX]; + int len; + + for (;;) { + int fd; + + len = snprintf(ren, sizeof(ren), "%s.bad.%08x", + name, arc4random()); + if (len >= sizeof(ren)) { + logmsg(LOG_ERR, "[priv] new name too long"); + return (1); + } + + /* lock destinanion */ + fd = open(ren, O_CREAT|O_EXCL, 0); + if (fd >= 0) { + close(fd); + break; + } + /* if file exists, try another name */ + if (errno != EEXIST && errno != EINTR) { + logmsg(LOG_ERR, "[priv] failed to create new name: %s", + strerror(errno)); + return (1); + } + } + + if (rename(name, ren)) { + logmsg(LOG_ERR, "[priv] failed to rename %s to %s: %s", + name, ren, strerror(errno)); + return (1); + } + + logmsg(LOG_NOTICE, + "[priv]: log file %s moved to %s", name, ren); + + return (0); +} /* * send the snaplength to privileged process @@ -223,6 +266,21 @@ priv_open_log(void) return (fd); } +/* Move-away and reopen log-file */ +int +priv_move_log(void) +{ + int cmd, ret; + + if (priv_fd < 0) + errx(1, "%s: called from privileged portion\n", __func__); + + cmd = PRIV_MOVE_LOG; + must_write(priv_fd, &cmd, sizeof(int)); + must_read(priv_fd, &ret, sizeof(int)); + + return (ret); +} /* If priv parent gets a TERM or HUP, pass it through to child instead */ static void diff --git a/contrib/pf/tftp-proxy/filter.c b/contrib/pf/tftp-proxy/filter.c new file mode 100644 index 0000000..cd6ce3c --- /dev/null +++ b/contrib/pf/tftp-proxy/filter.c @@ -0,0 +1,397 @@ +/* $OpenBSD: filter.c,v 1.1 2005/12/28 19:07:07 jcs Exp $ */ + +/* + * Copyright (c) 2004, 2005 Camiel Dobbelaar, <cd@sentia.nl> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <syslog.h> + +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include <net/if.h> +#include <net/pfvar.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "filter.h" + +/* From netinet/in.h, but only _KERNEL_ gets them. */ +#define satosin(sa) ((struct sockaddr_in *)(sa)) +#define satosin6(sa) ((struct sockaddr_in6 *)(sa)) + +enum { TRANS_FILTER = 0, TRANS_NAT, TRANS_RDR, TRANS_SIZE }; + +int prepare_rule(u_int32_t, int, struct sockaddr *, struct sockaddr *, + u_int16_t, u_int8_t); +int server_lookup4(struct sockaddr_in *, struct sockaddr_in *, + struct sockaddr_in *, u_int8_t); +int server_lookup6(struct sockaddr_in6 *, struct sockaddr_in6 *, + struct sockaddr_in6 *, u_int8_t); + +static struct pfioc_pooladdr pfp; +static struct pfioc_rule pfr; +static struct pfioc_trans pft; +static struct pfioc_trans_e pfte[TRANS_SIZE]; +static int dev, rule_log; +static char *qname; + +int +add_filter(u_int32_t id, u_int8_t dir, struct sockaddr *src, + struct sockaddr *dst, u_int16_t d_port, u_int8_t proto) +{ + if (!src || !dst || !d_port || !proto) { + errno = EINVAL; + return (-1); + } + + if (prepare_rule(id, PF_RULESET_FILTER, src, dst, d_port, proto) == -1) + return (-1); + + pfr.rule.direction = dir; + if (ioctl(dev, DIOCADDRULE, &pfr) == -1) + return (-1); + + return (0); +} + +int +add_nat(u_int32_t id, struct sockaddr *src, struct sockaddr *dst, + u_int16_t d_port, struct sockaddr *nat, u_int16_t nat_range_low, + u_int16_t nat_range_high, u_int8_t proto) +{ + if (!src || !dst || !d_port || !nat || !nat_range_low || !proto || + (src->sa_family != nat->sa_family)) { + errno = EINVAL; + return (-1); + } + + if (prepare_rule(id, PF_RULESET_NAT, src, dst, d_port, proto) == -1) + return (-1); + + if (nat->sa_family == AF_INET) { + memcpy(&pfp.addr.addr.v.a.addr.v4, + &satosin(nat)->sin_addr.s_addr, 4); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 4); + } else { + memcpy(&pfp.addr.addr.v.a.addr.v6, + &satosin6(nat)->sin6_addr.s6_addr, 16); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 16); + } + if (ioctl(dev, DIOCADDADDR, &pfp) == -1) + return (-1); + + pfr.rule.rpool.proxy_port[0] = nat_range_low; + pfr.rule.rpool.proxy_port[1] = nat_range_high; + if (ioctl(dev, DIOCADDRULE, &pfr) == -1) + return (-1); + + return (0); +} + +int +add_rdr(u_int32_t id, struct sockaddr *src, struct sockaddr *dst, + u_int16_t d_port, struct sockaddr *rdr, u_int16_t rdr_port, u_int8_t proto) +{ + if (!src || !dst || !d_port || !rdr || !rdr_port || !proto || + (src->sa_family != rdr->sa_family)) { + errno = EINVAL; + return (-1); + } + + if (prepare_rule(id, PF_RULESET_RDR, src, dst, d_port, proto) == -1) + return (-1); + + if (rdr->sa_family == AF_INET) { + memcpy(&pfp.addr.addr.v.a.addr.v4, + &satosin(rdr)->sin_addr.s_addr, 4); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 4); + } else { + memcpy(&pfp.addr.addr.v.a.addr.v6, + &satosin6(rdr)->sin6_addr.s6_addr, 16); + memset(&pfp.addr.addr.v.a.mask.addr8, 255, 16); + } + if (ioctl(dev, DIOCADDADDR, &pfp) == -1) + return (-1); + + pfr.rule.rpool.proxy_port[0] = rdr_port; + if (ioctl(dev, DIOCADDRULE, &pfr) == -1) + return (-1); + + return (0); +} + +int +do_commit(void) +{ + if (ioctl(dev, DIOCXCOMMIT, &pft) == -1) + return (-1); + + return (0); +} + +int +do_rollback(void) +{ + if (ioctl(dev, DIOCXROLLBACK, &pft) == -1) + return (-1); + + return (0); +} + +void +init_filter(char *opt_qname, int opt_verbose) +{ + struct pf_status status; + + qname = opt_qname; + + if (opt_verbose == 1) + rule_log = PF_LOG; + else if (opt_verbose == 2) + rule_log = PF_LOG_ALL; + + dev = open("/dev/pf", O_RDWR); + if (dev == -1) { + syslog(LOG_ERR, "can't open /dev/pf"); + exit(1); + } + if (ioctl(dev, DIOCGETSTATUS, &status) == -1) { + syslog(LOG_ERR, "DIOCGETSTATUS"); + exit(1); + } + if (!status.running) { + syslog(LOG_ERR, "pf is disabled"); + exit(1); + } +} + +int +prepare_commit(u_int32_t id) +{ + char an[PF_ANCHOR_NAME_SIZE]; + int i; + + memset(&pft, 0, sizeof pft); + pft.size = TRANS_SIZE; + pft.esize = sizeof pfte[0]; + pft.array = pfte; + + snprintf(an, PF_ANCHOR_NAME_SIZE, "%s/%d.%d", FTP_PROXY_ANCHOR, + getpid(), id); + for (i = 0; i < TRANS_SIZE; i++) { + memset(&pfte[i], 0, sizeof pfte[0]); + strlcpy(pfte[i].anchor, an, PF_ANCHOR_NAME_SIZE); + switch (i) { + case TRANS_FILTER: + pfte[i].rs_num = PF_RULESET_FILTER; + break; + case TRANS_NAT: + pfte[i].rs_num = PF_RULESET_NAT; + break; + case TRANS_RDR: + pfte[i].rs_num = PF_RULESET_RDR; + break; + default: + errno = EINVAL; + return (-1); + } + } + + if (ioctl(dev, DIOCXBEGIN, &pft) == -1) + return (-1); + + return (0); +} + +int +prepare_rule(u_int32_t id, int rs_num, struct sockaddr *src, + struct sockaddr *dst, u_int16_t d_port, u_int8_t proto) +{ + char an[PF_ANCHOR_NAME_SIZE]; + + if ((src->sa_family != AF_INET && src->sa_family != AF_INET6) || + (src->sa_family != dst->sa_family)) { + errno = EPROTONOSUPPORT; + return (-1); + } + + memset(&pfp, 0, sizeof pfp); + memset(&pfr, 0, sizeof pfr); + snprintf(an, PF_ANCHOR_NAME_SIZE, "%s/%d.%d", FTP_PROXY_ANCHOR, + getpid(), id); + strlcpy(pfp.anchor, an, PF_ANCHOR_NAME_SIZE); + strlcpy(pfr.anchor, an, PF_ANCHOR_NAME_SIZE); + + switch (rs_num) { + case PF_RULESET_FILTER: + pfr.ticket = pfte[TRANS_FILTER].ticket; + break; + case PF_RULESET_NAT: + pfr.ticket = pfte[TRANS_NAT].ticket; + break; + case PF_RULESET_RDR: + pfr.ticket = pfte[TRANS_RDR].ticket; + break; + default: + errno = EINVAL; + return (-1); + } + if (ioctl(dev, DIOCBEGINADDRS, &pfp) == -1) + return (-1); + pfr.pool_ticket = pfp.ticket; + + /* Generic for all rule types. */ + pfr.rule.af = src->sa_family; + pfr.rule.proto = proto; + pfr.rule.src.addr.type = PF_ADDR_ADDRMASK; + pfr.rule.dst.addr.type = PF_ADDR_ADDRMASK; + if (src->sa_family == AF_INET) { + memcpy(&pfr.rule.src.addr.v.a.addr.v4, + &satosin(src)->sin_addr.s_addr, 4); + memset(&pfr.rule.src.addr.v.a.mask.addr8, 255, 4); + memcpy(&pfr.rule.dst.addr.v.a.addr.v4, + &satosin(dst)->sin_addr.s_addr, 4); + memset(&pfr.rule.dst.addr.v.a.mask.addr8, 255, 4); + } else { + memcpy(&pfr.rule.src.addr.v.a.addr.v6, + &satosin6(src)->sin6_addr.s6_addr, 16); + memset(&pfr.rule.src.addr.v.a.mask.addr8, 255, 16); + memcpy(&pfr.rule.dst.addr.v.a.addr.v6, + &satosin6(dst)->sin6_addr.s6_addr, 16); + memset(&pfr.rule.dst.addr.v.a.mask.addr8, 255, 16); + } + pfr.rule.dst.port_op = PF_OP_EQ; + pfr.rule.dst.port[0] = htons(d_port); + + switch (rs_num) { + case PF_RULESET_FILTER: + /* + * pass quick [log] inet[6] proto tcp \ + * from $src to $dst port = $d_port flags S/SAFR keep state + * (max 1) [queue qname] + */ + pfr.rule.action = PF_PASS; + pfr.rule.quick = 1; + pfr.rule.log = rule_log; + pfr.rule.keep_state = 1; + pfr.rule.flags = (proto == IPPROTO_TCP ? TH_SYN : NULL); + pfr.rule.flagset = (proto == IPPROTO_TCP ? + (TH_SYN|TH_ACK|TH_FIN|TH_RST) : NULL); + pfr.rule.max_states = 1; + if (qname != NULL) + strlcpy(pfr.rule.qname, qname, sizeof pfr.rule.qname); + break; + case PF_RULESET_NAT: + /* + * nat inet[6] proto tcp from $src to $dst port $d_port -> $nat + */ + pfr.rule.action = PF_NAT; + break; + case PF_RULESET_RDR: + /* + * rdr inet[6] proto tcp from $src to $dst port $d_port -> $rdr + */ + pfr.rule.action = PF_RDR; + break; + default: + errno = EINVAL; + return (-1); + } + + return (0); +} + +int +server_lookup(struct sockaddr *client, struct sockaddr *proxy, + struct sockaddr *server, u_int8_t proto) +{ + if (client->sa_family == AF_INET) + return (server_lookup4(satosin(client), satosin(proxy), + satosin(server), proto)); + + if (client->sa_family == AF_INET6) + return (server_lookup6(satosin6(client), satosin6(proxy), + satosin6(server), proto)); + + errno = EPROTONOSUPPORT; + return (-1); +} + +int +server_lookup4(struct sockaddr_in *client, struct sockaddr_in *proxy, + struct sockaddr_in *server, u_int8_t proto) +{ + struct pfioc_natlook pnl; + + memset(&pnl, 0, sizeof pnl); + pnl.direction = PF_OUT; + pnl.af = AF_INET; + pnl.proto = proto; + memcpy(&pnl.saddr.v4, &client->sin_addr.s_addr, sizeof pnl.saddr.v4); + memcpy(&pnl.daddr.v4, &proxy->sin_addr.s_addr, sizeof pnl.daddr.v4); + pnl.sport = client->sin_port; + pnl.dport = proxy->sin_port; + + if (ioctl(dev, DIOCNATLOOK, &pnl) == -1) + return (-1); + + memset(server, 0, sizeof(struct sockaddr_in)); + server->sin_len = sizeof(struct sockaddr_in); + server->sin_family = AF_INET; + memcpy(&server->sin_addr.s_addr, &pnl.rdaddr.v4, + sizeof server->sin_addr.s_addr); + server->sin_port = pnl.rdport; + + return (0); +} + +int +server_lookup6(struct sockaddr_in6 *client, struct sockaddr_in6 *proxy, + struct sockaddr_in6 *server, u_int8_t proto) +{ + struct pfioc_natlook pnl; + + memset(&pnl, 0, sizeof pnl); + pnl.direction = PF_OUT; + pnl.af = AF_INET6; + pnl.proto = proto; + memcpy(&pnl.saddr.v6, &client->sin6_addr.s6_addr, sizeof pnl.saddr.v6); + memcpy(&pnl.daddr.v6, &proxy->sin6_addr.s6_addr, sizeof pnl.daddr.v6); + pnl.sport = client->sin6_port; + pnl.dport = proxy->sin6_port; + + if (ioctl(dev, DIOCNATLOOK, &pnl) == -1) + return (-1); + + memset(server, 0, sizeof(struct sockaddr_in6)); + server->sin6_len = sizeof(struct sockaddr_in6); + server->sin6_family = AF_INET6; + memcpy(&server->sin6_addr.s6_addr, &pnl.rdaddr.v6, + sizeof server->sin6_addr); + server->sin6_port = pnl.rdport; + + return (0); +} diff --git a/contrib/pf/tftp-proxy/filter.h b/contrib/pf/tftp-proxy/filter.h new file mode 100644 index 0000000..04d43f7 --- /dev/null +++ b/contrib/pf/tftp-proxy/filter.h @@ -0,0 +1,32 @@ +/* $OpenBSD: filter.h,v 1.1 2005/12/28 19:07:07 jcs Exp $ */ + +/* + * Copyright (c) 2004, 2005 Camiel Dobbelaar, <cd@sentia.nl> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define FTP_PROXY_ANCHOR "tftp-proxy" + +int add_filter(u_int32_t, u_int8_t, struct sockaddr *, struct sockaddr *, + u_int16_t, u_int8_t); +int add_nat(u_int32_t, struct sockaddr *, struct sockaddr *, u_int16_t, + struct sockaddr *, u_int16_t, u_int16_t, u_int8_t); +int add_rdr(u_int32_t, struct sockaddr *, struct sockaddr *, u_int16_t, + struct sockaddr *, u_int16_t, u_int8_t); +int do_commit(void); +int do_rollback(void); +void init_filter(char *, int); +int prepare_commit(u_int32_t); +int server_lookup(struct sockaddr *, struct sockaddr *, struct sockaddr *, + u_int8_t); diff --git a/contrib/pf/tftp-proxy/tftp-proxy.8 b/contrib/pf/tftp-proxy/tftp-proxy.8 new file mode 100644 index 0000000..b9098ef --- /dev/null +++ b/contrib/pf/tftp-proxy/tftp-proxy.8 @@ -0,0 +1,140 @@ +.\" $OpenBSD: tftp-proxy.8,v 1.1 2005/12/28 19:07:07 jcs Exp $ +.\" +.\" Copyright (c) 2005 joshua stein <jcs@openbsd.org> +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. The name of the author may not be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd November 28, 2005 +.Dt TFTP-PROXY 8 +.Os +.Sh NAME +.Nm tftp-proxy +.Nd Internet Trivial File Transfer Protocol proxy +.Sh SYNOPSIS +.Nm tftp-proxy +.Op Fl v +.Op Fl w Ar transwait +.Sh DESCRIPTION +.Nm +is a proxy for the Internet Trivial File Transfer Protocol invoked by +the +.Xr inetd 8 +internet server. +TFTP connections should be redirected to the proxy using the +.Xr pf 4 +.Ar rdr +command, after which the proxy connects to the server on behalf of +the client. +.Pp +The proxy establishes a +.Xr pf 4 +.Ar rdr +rule using the +.Ar anchor +facility to rewrite packets between the client and the server. +Once the rule is established, +.Nm +forwards the initial request from the client to the server to begin the +transfer. +After +.Ar transwait +seconds, the +.Xr pf 4 +NAT state is assumed to have been established and the +.Ar rdr +rule is deleted and the program exits. +Once the transfer between the client and the server is completed, the +NAT state will naturally expire. +.Pp +Assuming the TFTP command request is from $client to $server, the +proxy connected to the server using the $proxy source address, and +$port is negotiated, +.Nm +adds the following rule to the anchor: +.Bd -literal -offset indent +rdr proto udp from $server to $proxy port $port -\*(Gt $client +.Ed +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl v +Log the connection and request information to +.Xr syslogd 8 . +.It Fl w Ar transwait +Number of seconds to wait for the data transmission to begin before +removing the +.Xr pf 4 +.Ar rdr +rule. +The default is 2 seconds. +.El +.Sh CONFIGURATION +To make use of the proxy, +.Xr pf.conf 5 +needs the following rules. +The anchors are mandatory. +Adjust the rules as needed for your configuration. +.Pp +In the NAT section: +.Bd -literal -offset indent +nat on $ext_if from $int_if -\*(Gt ($ext_if:0) + +no nat on $ext_if to port tftp + +rdr-anchor "tftp-proxy/*" +rdr on $int_if proto udp from $lan to any port tftp -\*(Gt \e + 127.0.0.1 port 6969 +.Ed +.Pp +In the filter section, an anchor must be added to hold the pass rules: +.Bd -literal -offset indent +anchor "tftp-proxy/*" +.Ed +.Pp +.Xr inetd 8 +must be configured to spawn the proxy on the port that packets are +being forwarded to by +.Xr pf 4 . +An example +.Xr inetd.conf 5 +entry follows: +.Bd -literal -offset indent +127.0.0.1:6969 dgram udp wait root \e + /usr/libexec/tftp-proxy tftp-proxy +.Ed +.Sh SEE ALSO +.Xr tftp 1 , +.Xr pf 4 , +.Xr pf.conf 5 , +.Xr ftp-proxy 8 , +.Xr inetd 8 , +.Xr syslogd 8 , +.Xr tftpd 8 +.Sh CAVEATS +.Nm +chroots to +.Pa /var/empty +and changes to user +.Dq proxy +to drop privileges. diff --git a/contrib/pf/tftp-proxy/tftp-proxy.c b/contrib/pf/tftp-proxy/tftp-proxy.c new file mode 100644 index 0000000..18d3323 --- /dev/null +++ b/contrib/pf/tftp-proxy/tftp-proxy.c @@ -0,0 +1,393 @@ +/* $OpenBSD: tftp-proxy.c,v 1.2 2006/12/20 03:33:38 joel Exp $ + * + * Copyright (c) 2005 DLS Internet Services + * Copyright (c) 2004, 2005 Camiel Dobbelaar, <cd@sentia.nl> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/uio.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <arpa/inet.h> +#include <arpa/tftp.h> +#include <sys/socket.h> +#include <net/if.h> +#include <net/pfvar.h> + +#include <errno.h> +#include <pwd.h> +#include <stdio.h> +#include <syslog.h> +#include <string.h> +#include <stdlib.h> + +#include "filter.h" + +#define CHROOT_DIR "/var/empty" +#define NOPRIV_USER "proxy" + +#define PF_NAT_PROXY_PORT_LOW 50001 +#define PF_NAT_PROXY_PORT_HIGH 65535 + +#define DEFTRANSWAIT 2 +#define NTOP_BUFS 4 +#define PKTSIZE SEGSIZE+4 + +const char *opcode(int); +const char *sock_ntop(struct sockaddr *); +u_int16_t pick_proxy_port(void); +static void usage(void); + +extern char *__progname; +char ntop_buf[NTOP_BUFS][INET6_ADDRSTRLEN]; +int verbose = 0; + +int +main(int argc, char *argv[]) +{ + int c, fd = 0, on = 1, out_fd = 0, peer, reqsize = 0; + int transwait = DEFTRANSWAIT; + char *p; + struct tftphdr *tp; + struct passwd *pw; + + char cbuf[CMSG_SPACE(sizeof(struct sockaddr_storage))]; + char req[PKTSIZE]; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + + struct sockaddr_storage from, proxy, server, proxy_to_server, s_in; + struct sockaddr_in sock_out; + socklen_t j; + in_port_t bindport; + + openlog(__progname, LOG_PID | LOG_NDELAY, LOG_DAEMON); + + while ((c = getopt(argc, argv, "vw:")) != -1) + switch (c) { + case 'v': + verbose++; + break; + case 'w': + transwait = strtoll(optarg, &p, 10); + if (transwait < 1) { + syslog(LOG_ERR, "invalid -w value"); + exit(1); + } + break; + default: + usage(); + break; + } + + /* open /dev/pf */ + init_filter(NULL, verbose); + + tzset(); + + pw = getpwnam(NOPRIV_USER); + if (!pw) { + syslog(LOG_ERR, "no such user %s: %m", NOPRIV_USER); + exit(1); + } + if (chroot(CHROOT_DIR) || chdir("/")) { + syslog(LOG_ERR, "chroot %s: %m", CHROOT_DIR); + exit(1); + } + if (setgroups(1, &pw->pw_gid) || + setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || + setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) { + syslog(LOG_ERR, "can't revoke privs: %m"); + exit(1); + } + + /* non-blocking io */ + if (ioctl(fd, FIONBIO, &on) < 0) { + syslog(LOG_ERR, "ioctl(FIONBIO): %m"); + exit(1); + } + + if (setsockopt(fd, IPPROTO_IP, IP_RECVDSTADDR, &on, sizeof(on)) == -1) { + syslog(LOG_ERR, "setsockopt(IP_RECVDSTADDR): %m"); + exit(1); + } + + j = sizeof(s_in); + if (getsockname(fd, (struct sockaddr *)&s_in, &j) == -1) { + syslog(LOG_ERR, "getsockname: %m"); + exit(1); + } + + bindport = ((struct sockaddr_in *)&s_in)->sin_port; + + /* req will be pushed back out at the end, unchanged */ + j = sizeof(from); + if ((reqsize = recvfrom(fd, req, sizeof(req), MSG_PEEK, + (struct sockaddr *)&from, &j)) < 0) { + syslog(LOG_ERR, "recvfrom: %m"); + exit(1); + } + + bzero(&msg, sizeof(msg)); + iov.iov_base = req; + iov.iov_len = sizeof(req); + msg.msg_name = &from; + msg.msg_namelen = sizeof(from); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = CMSG_LEN(sizeof(struct sockaddr_storage)); + + if (recvmsg(fd, &msg, 0) < 0) { + syslog(LOG_ERR, "recvmsg: %m"); + exit(1); + } + + close(fd); + close(1); + + peer = socket(from.ss_family, SOCK_DGRAM, 0); + if (peer < 0) { + syslog(LOG_ERR, "socket: %m"); + exit(1); + } + memset(&s_in, 0, sizeof(s_in)); + s_in.ss_family = from.ss_family; + s_in.ss_len = from.ss_len; + + /* get local address if possible */ + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == IPPROTO_IP && + cmsg->cmsg_type == IP_RECVDSTADDR) { + memcpy(&((struct sockaddr_in *)&s_in)->sin_addr, + CMSG_DATA(cmsg), sizeof(struct in_addr)); + break; + } + } + + if (bind(peer, (struct sockaddr *)&s_in, s_in.ss_len) < 0) { + syslog(LOG_ERR, "bind: %m"); + exit(1); + } + if (connect(peer, (struct sockaddr *)&from, from.ss_len) < 0) { + syslog(LOG_ERR, "connect: %m"); + exit(1); + } + + tp = (struct tftphdr *)req; + if (!(ntohs(tp->th_opcode) == RRQ || ntohs(tp->th_opcode) == WRQ)) { + /* not a tftp request, bail */ + if (verbose) { + syslog(LOG_WARNING, "not a valid tftp request"); + exit(1); + } else + /* exit 0 so inetd doesn't log anything */ + exit(0); + } + + j = sizeof(struct sockaddr_storage); + if (getsockname(fd, (struct sockaddr *)&proxy, &j) == -1) { + syslog(LOG_ERR, "getsockname: %m"); + exit(1); + } + + ((struct sockaddr_in *)&proxy)->sin_port = bindport; + + /* find the un-rdr'd server and port the client wanted */ + if (server_lookup((struct sockaddr *)&from, + (struct sockaddr *)&proxy, (struct sockaddr *)&server, + IPPROTO_UDP) != 0) { + syslog(LOG_ERR, "pf connection lookup failed (no rdr?)"); + exit(1); + } + + /* establish a new outbound connection to the remote server */ + if ((out_fd = socket(((struct sockaddr *)&from)->sa_family, + SOCK_DGRAM, IPPROTO_UDP)) < 0) { + syslog(LOG_ERR, "couldn't create new socket"); + exit(1); + } + + bzero((char *)&sock_out, sizeof(sock_out)); + sock_out.sin_family = from.ss_family; + sock_out.sin_port = htons(pick_proxy_port()); + if (bind(out_fd, (struct sockaddr *)&sock_out, sizeof(sock_out)) < 0) { + syslog(LOG_ERR, "couldn't bind to new socket: %m"); + exit(1); + } + + if (connect(out_fd, (struct sockaddr *)&server, + ((struct sockaddr *)&server)->sa_len) < 0 && errno != EINPROGRESS) { + syslog(LOG_ERR, "couldn't connect to remote server: %m"); + exit(1); + } + + j = sizeof(struct sockaddr_storage); + if ((getsockname(out_fd, (struct sockaddr *)&proxy_to_server, + &j)) < 0) { + syslog(LOG_ERR, "getsockname: %m"); + exit(1); + } + + if (verbose) + syslog(LOG_INFO, "%s:%d -> %s:%d/%s:%d -> %s:%d \"%s %s\"", + sock_ntop((struct sockaddr *)&from), + ntohs(((struct sockaddr_in *)&from)->sin_port), + sock_ntop((struct sockaddr *)&proxy), + ntohs(((struct sockaddr_in *)&proxy)->sin_port), + sock_ntop((struct sockaddr *)&proxy_to_server), + ntohs(((struct sockaddr_in *)&proxy_to_server)->sin_port), + sock_ntop((struct sockaddr *)&server), + ntohs(((struct sockaddr_in *)&server)->sin_port), + opcode(ntohs(tp->th_opcode)), + tp->th_stuff); + + /* get ready to add rdr and pass rules */ + if (prepare_commit(1) == -1) { + syslog(LOG_ERR, "couldn't prepare pf commit"); + exit(1); + } + + /* rdr from server to us on our random port -> client on its port */ + if (add_rdr(1, (struct sockaddr *)&server, + (struct sockaddr *)&proxy_to_server, ntohs(sock_out.sin_port), + (struct sockaddr *)&from, + ntohs(((struct sockaddr_in *)&from)->sin_port), + IPPROTO_UDP) == -1) { + syslog(LOG_ERR, "couldn't add rdr"); + exit(1); + } + + /* explicitly allow the packets to return back to the client (which pf + * will see post-rdr) */ + if (add_filter(1, PF_IN, (struct sockaddr *)&server, + (struct sockaddr *)&from, + ntohs(((struct sockaddr_in *)&from)->sin_port), + IPPROTO_UDP) == -1) { + syslog(LOG_ERR, "couldn't add pass in"); + exit(1); + } + if (add_filter(1, PF_OUT, (struct sockaddr *)&server, + (struct sockaddr *)&from, + ntohs(((struct sockaddr_in *)&from)->sin_port), + IPPROTO_UDP) == -1) { + syslog(LOG_ERR, "couldn't add pass out"); + exit(1); + } + + /* and just in case, to pass out from us to the server */ + if (add_filter(1, PF_OUT, (struct sockaddr *)&proxy_to_server, + (struct sockaddr *)&server, + ntohs(((struct sockaddr_in *)&server)->sin_port), + IPPROTO_UDP) == -1) { + syslog(LOG_ERR, "couldn't add pass out"); + exit(1); + } + + if (do_commit() == -1) { + syslog(LOG_ERR, "couldn't commit pf rules"); + exit(1); + } + + /* forward the initial tftp request and start the insanity */ + if (send(out_fd, tp, reqsize, 0) < 0) { + syslog(LOG_ERR, "couldn't forward tftp packet: %m"); + exit(1); + } + + /* allow the transfer to start to establish a state */ + sleep(transwait); + + /* delete our rdr rule and clean up */ + prepare_commit(1); + do_commit(); + + return(0); +} + +const char * +opcode(int code) +{ + static char str[6]; + + switch (code) { + case 1: + (void)snprintf(str, sizeof(str), "RRQ"); + break; + case 2: + (void)snprintf(str, sizeof(str), "WRQ"); + break; + default: + (void)snprintf(str, sizeof(str), "(%d)", code); + break; + } + + return (str); +} + +const char * +sock_ntop(struct sockaddr *sa) +{ + static int n = 0; + + /* Cycle to next buffer. */ + n = (n + 1) % NTOP_BUFS; + ntop_buf[n][0] = '\0'; + + if (sa->sa_family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + + return (inet_ntop(AF_INET, &sin->sin_addr, ntop_buf[n], + sizeof ntop_buf[0])); + } + + if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + + return (inet_ntop(AF_INET6, &sin6->sin6_addr, ntop_buf[n], + sizeof ntop_buf[0])); + } + + return (NULL); +} + +u_int16_t +pick_proxy_port(void) +{ + return (IPPORT_HIFIRSTAUTO + (arc4random() % + (IPPORT_HILASTAUTO - IPPORT_HIFIRSTAUTO))); +} + +static void +usage(void) +{ + syslog(LOG_ERR, "usage: %s [-v] [-w transwait]", __progname); + exit(1); +} |