From 153fa4f49e7ae4d39851638cfb970d383c0f8b91 Mon Sep 17 00:00:00 2001 From: luigi Date: Tue, 23 Mar 2010 09:58:59 +0000 Subject: MFC of a large number of ipfw and dummynet fixes and enhancements done in CURRENT over the last 4 months. HEAD and RELENG_8 are almost in sync now for ipfw, dummynet the pfil hooks and related components. Among the most noticeable changes: - r200855 more efficient lookup of skipto rules, and remove O(N) blocks from critical sections in the kernel; - r204591 large restructuring of the dummynet module, with support for multiple scheduling algorithms (4 available so far) See the original commit logs for details. Changes in the kernel/userland ABI should be harmless because the kernel is able to understand previous requests from RELENG_8 and RELENG_7. For this reason, this changeset would be applicable to RELENG_7 as well, but i am not sure if it is worthwhile. --- sbin/ipfw/Makefile | 1 + sbin/ipfw/altq.c | 1 + sbin/ipfw/dummynet.c | 965 +++++++++++++++++++++++++++++++++------------------ sbin/ipfw/ipfw.8 | 264 ++++++++++---- sbin/ipfw/ipfw2.c | 365 +++++++++++-------- sbin/ipfw/ipfw2.h | 31 +- sbin/ipfw/main.c | 158 ++++++--- 7 files changed, 1177 insertions(+), 608 deletions(-) (limited to 'sbin') diff --git a/sbin/ipfw/Makefile b/sbin/ipfw/Makefile index c09ebca..b25f38c 100644 --- a/sbin/ipfw/Makefile +++ b/sbin/ipfw/Makefile @@ -3,6 +3,7 @@ PROG= ipfw SRCS= ipfw2.c dummynet.c ipv6.c main.c nat.c altq.c WARNS?= 2 +DPADD= ${LIBUTIL} LDADD= -lutil MAN= ipfw.8 diff --git a/sbin/ipfw/altq.c b/sbin/ipfw/altq.c index b00a1e0..8cf19e5 100644 --- a/sbin/ipfw/altq.c +++ b/sbin/ipfw/altq.c @@ -39,6 +39,7 @@ #include /* IFNAMSIZ */ #include +#include /* in_addr */ #include /* diff --git a/sbin/ipfw/dummynet.c b/sbin/ipfw/dummynet.c index 9e68e65..eb6547a 100644 --- a/sbin/ipfw/dummynet.c +++ b/sbin/ipfw/dummynet.c @@ -1,10 +1,5 @@ /* - * Copyright (c) 2002-2003 Luigi Rizzo - * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp - * Copyright (c) 1994 Ugen J.S.Antsilevich - * - * Idea and grammar partially left from: - * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 2002-2003,2010 Luigi Rizzo * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. @@ -15,8 +10,6 @@ * * This software is provided ``AS IS'' without any warranties of any kind. * - * NEW command line interface for IP firewall facility - * * $FreeBSD$ * * dummynet support @@ -24,7 +17,6 @@ #include #include -#include /* XXX there are several sysctl leftover here */ #include @@ -46,6 +38,7 @@ #include #include /* inet_ntoa */ + static struct _s_x dummynet_params[] = { { "plr", TOK_PLR }, { "noerror", TOK_NOERROR }, @@ -56,27 +49,59 @@ static struct _s_x dummynet_params[] = { { "src-port", TOK_SRCPORT }, { "proto", TOK_PROTO }, { "weight", TOK_WEIGHT }, + { "lmax", TOK_LMAX }, + { "maxlen", TOK_LMAX }, { "all", TOK_ALL }, - { "mask", TOK_MASK }, + { "mask", TOK_MASK }, /* alias for both */ + { "sched_mask", TOK_SCHED_MASK }, + { "flow_mask", TOK_FLOW_MASK }, { "droptail", TOK_DROPTAIL }, { "red", TOK_RED }, { "gred", TOK_GRED }, { "bw", TOK_BW }, { "bandwidth", TOK_BW }, { "delay", TOK_DELAY }, + { "link", TOK_LINK }, { "pipe", TOK_PIPE }, { "queue", TOK_QUEUE }, + { "flowset", TOK_FLOWSET }, + { "sched", TOK_SCHED }, + { "pri", TOK_PRI }, + { "priority", TOK_PRI }, + { "type", TOK_TYPE }, { "flow-id", TOK_FLOWID}, { "dst-ipv6", TOK_DSTIP6}, { "dst-ip6", TOK_DSTIP6}, { "src-ipv6", TOK_SRCIP6}, { "src-ip6", TOK_SRCIP6}, - { "profile", TOK_PIPE_PROFILE}, + { "profile", TOK_PROFILE}, { "burst", TOK_BURST}, { "dummynet-params", TOK_NULL }, { NULL, 0 } /* terminator */ }; +#define O_NEXT(p, len) ((void *)((char *)p + len)) + +static void +oid_fill(struct dn_id *oid, int len, int type, uintptr_t id) +{ + oid->len = len; + oid->type = type; + oid->subtype = 0; + oid->id = id; +} + +/* make room in the buffer and move the pointer forward */ +static void * +o_next(struct dn_id **o, int len, int type) +{ + struct dn_id *ret = *o; + oid_fill(ret, len, type, 0); + *o = O_NEXT(*o, len); + return ret; +} + +#if 0 static int sort_q(void *arg, const void *pa, const void *pb) { @@ -108,117 +133,84 @@ sort_q(void *arg, const void *pa, const void *pb) res = 1; return (int)(rev ? res : -res); } +#endif +/* print a mask and header for the subsequent list of flows */ static void -list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q) +print_mask(struct ipfw_flow_id *id) +{ + if (!IS_IP6_FLOW_ID(id)) { + printf(" " + "mask: %s 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", + id->extra ? "queue," : "", + id->proto, + id->src_ip, id->src_port, + id->dst_ip, id->dst_port); + + printf("BKT Prot ___Source IP/port____ " + "____Dest. IP/port____ " + "Tot_pkt/bytes Pkt/Byte Drp\n"); + } else { + char buf[255]; + printf("\n mask: %sproto: 0x%02x, flow_id: 0x%08x, ", + id->extra ? "queue," : "", + id->proto, id->flow_id6); + inet_ntop(AF_INET6, &(id->src_ip6), buf, sizeof(buf)); + printf("%s/0x%04x -> ", buf, id->src_port); + inet_ntop(AF_INET6, &(id->dst_ip6), buf, sizeof(buf)); + printf("%s/0x%04x\n", buf, id->dst_port); + + printf("BKT ___Prot___ _flow-id_ " + "______________Source IPv6/port_______________ " + "_______________Dest. IPv6/port_______________ " + "Tot_pkt/bytes Pkt/Byte Drp\n"); + } +} + +static void +list_flow(struct dn_flow *ni) { - int l; - int index_printed, indexes = 0; char buff[255]; struct protoent *pe; + struct in_addr ina; + struct ipfw_flow_id *id = &ni->fid; - if (fs->rq_elements == 0) - return; - - if (co.do_sort != 0) - qsort_r(q, fs->rq_elements, sizeof *q, NULL, sort_q); - - /* Print IPv4 flows */ - index_printed = 0; - for (l = 0; l < fs->rq_elements; l++) { - struct in_addr ina; - + pe = getprotobynumber(id->proto); /* XXX: Should check for IPv4 flows */ - if (IS_IP6_FLOW_ID(&(q[l].id))) - continue; - - if (!index_printed) { - index_printed = 1; - if (indexes > 0) /* currently a no-op */ - printf("\n"); - indexes++; - printf(" " - "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", - fs->flow_mask.proto, - fs->flow_mask.src_ip, fs->flow_mask.src_port, - fs->flow_mask.dst_ip, fs->flow_mask.dst_port); - - printf("BKT Prot ___Source IP/port____ " - "____Dest. IP/port____ " - "Tot_pkt/bytes Pkt/Byte Drp\n"); - } - - printf("%3d ", q[l].hash_slot); - pe = getprotobynumber(q[l].id.proto); + printf("%3u%c", (ni->oid.id) & 0xff, + id->extra ? '*' : ' '); + if (!IS_IP6_FLOW_ID(id)) { if (pe) printf("%-4s ", pe->p_name); else - printf("%4u ", q[l].id.proto); - ina.s_addr = htonl(q[l].id.src_ip); + printf("%4u ", id->proto); + ina.s_addr = htonl(id->src_ip); printf("%15s/%-5d ", - inet_ntoa(ina), q[l].id.src_port); - ina.s_addr = htonl(q[l].id.dst_ip); + inet_ntoa(ina), id->src_port); + ina.s_addr = htonl(id->dst_ip); printf("%15s/%-5d ", - inet_ntoa(ina), q[l].id.dst_port); - printf("%4llu %8llu %2u %4u %3u\n", - align_uint64(&q[l].tot_pkts), - align_uint64(&q[l].tot_bytes), - q[l].len, q[l].len_bytes, q[l].drops); - if (co.verbose) - printf(" S %20llu F %20llu\n", - align_uint64(&q[l].S), align_uint64(&q[l].F)); - } - - /* Print IPv6 flows */ - index_printed = 0; - for (l = 0; l < fs->rq_elements; l++) { - if (!IS_IP6_FLOW_ID(&(q[l].id))) - continue; - - if (!index_printed) { - index_printed = 1; - if (indexes > 0) - printf("\n"); - indexes++; - printf("\n mask: proto: 0x%02x, flow_id: 0x%08x, ", - fs->flow_mask.proto, fs->flow_mask.flow_id6); - inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6), - buff, sizeof(buff)); - printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port); - inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6), - buff, sizeof(buff) ); - printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port); - - printf("BKT ___Prot___ _flow-id_ " - "______________Source IPv6/port_______________ " - "_______________Dest. IPv6/port_______________ " - "Tot_pkt/bytes Pkt/Byte Drp\n"); - } - printf("%3d ", q[l].hash_slot); - pe = getprotobynumber(q[l].id.proto); + inet_ntoa(ina), id->dst_port); + } else { + /* Print IPv6 flows */ if (pe != NULL) printf("%9s ", pe->p_name); else - printf("%9u ", q[l].id.proto); - printf("%7d %39s/%-5d ", q[l].id.flow_id6, - inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)), - q[l].id.src_port); + printf("%9u ", id->proto); + printf("%7d %39s/%-5d ", id->flow_id6, + inet_ntop(AF_INET6, &(id->src_ip6), buff, sizeof(buff)), + id->src_port); printf(" %39s/%-5d ", - inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)), - q[l].id.dst_port); - printf(" %4llu %8llu %2u %4u %3u\n", - align_uint64(&q[l].tot_pkts), - align_uint64(&q[l].tot_bytes), - q[l].len, q[l].len_bytes, q[l].drops); - if (co.verbose) - printf(" S %20llu F %20llu\n", - align_uint64(&q[l].S), - align_uint64(&q[l].F)); + inet_ntop(AF_INET6, &(id->dst_ip6), buff, sizeof(buff)), + id->dst_port); } + printf("%4llu %8llu %2u %4u %3u\n", + align_uint64(&ni->tot_pkts), + align_uint64(&ni->tot_bytes), + ni->length, ni->len_bytes, ni->drops); } static void -print_flowset_parms(struct dn_flow_set *fs, char *prefix) +print_flowset_parms(struct dn_fs *fs, char *prefix) { int l; char qs[30]; @@ -226,7 +218,7 @@ print_flowset_parms(struct dn_flow_set *fs, char *prefix) char red[90]; /* Display RED parameters */ l = fs->qsize; - if (fs->flags_fs & DN_QSIZE_IS_BYTES) { + if (fs->flags & DN_QSIZE_BYTES) { if (l >= 8192) sprintf(qs, "%d KB", l / 1024); else @@ -237,23 +229,34 @@ print_flowset_parms(struct dn_flow_set *fs, char *prefix) sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff)); else plr[0] = '\0'; - if (fs->flags_fs & DN_IS_RED) /* RED parameters */ + + if (fs->flags & DN_IS_RED) /* RED parameters */ sprintf(red, "\n\t %cRED w_q %f min_th %d max_th %d max_p %f", - (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ', + (fs->flags & DN_IS_GENTLE_RED) ? 'G' : ' ', 1.0 * fs->w_q / (double)(1 << SCALE_RED), - SCALE_VAL(fs->min_th), - SCALE_VAL(fs->max_th), + fs->min_th, + fs->max_th, 1.0 * fs->max_p / (double)(1 << SCALE_RED)); else sprintf(red, "droptail"); - printf("%s %s%s %d queues (%d buckets) %s\n", - prefix, qs, plr, fs->rq_elements, fs->rq_size, red); + if (prefix[0]) { + printf("%s %s%s %d queues (%d buckets) %s\n", + prefix, qs, plr, fs->oid.id, fs->buckets, red); + prefix[0] = '\0'; + } else { + printf("q%05d %s%s %d flows (%d buckets) sched %d " + "weight %d lmax %d pri %d %s\n", + fs->fs_nr, qs, plr, fs->oid.id, fs->buckets, + fs->sched_nr, fs->par[0], fs->par[1], fs->par[2], red); + if (fs->flags & DN_HAVE_MASK) + print_mask(&fs->flow_mask); + } } static void -print_extra_delay_parms(struct dn_pipe *p) +print_extra_delay_parms(struct dn_profile *p) { double loss; if (p->samples_no <= 0) @@ -265,105 +268,126 @@ print_extra_delay_parms(struct dn_pipe *p) p->name, loss, p->samples_no); } -void -ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]) +static void +flush_buf(char *buf) { - int rulenum; - void *next = data; - struct dn_pipe *p = (struct dn_pipe *) data; - struct dn_flow_set *fs; - struct dn_flow_queue *q; - int l; - - if (ac > 0) - rulenum = strtoul(*av++, NULL, 10); - else - rulenum = 0; - for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) { - double b = p->bandwidth; - char buf[30]; - char prefix[80]; - char burst[5 + 7]; - - if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE) - break; /* done with pipes, now queues */ - - /* - * compute length, as pipe have variable size - */ - l = sizeof(*p) + p->fs.rq_elements * sizeof(*q); - next = (char *)p + l; - nbytes -= l; - - if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2) - continue; - - /* - * Print rate (or clocking interface) - */ - if (p->if_name[0] != '\0') - sprintf(buf, "%s", p->if_name); - else if (b == 0) - sprintf(buf, "unlimited"); - else if (b >= 1000000) - sprintf(buf, "%7.3f Mbit/s", b/1000000); - else if (b >= 1000) - sprintf(buf, "%7.3f Kbit/s", b/1000); - else - sprintf(buf, "%7.3f bit/s ", b); - - sprintf(prefix, "%05d: %s %4d ms ", - p->pipe_nr, buf, p->delay); - - print_flowset_parms(&(p->fs), prefix); - - if (humanize_number(burst, sizeof(burst), p->burst, - "Byte", HN_AUTOSCALE, 0) < 0 || co.verbose) - printf("\t burst: %ju Byte\n", p->burst); - else - printf("\t burst: %s\n", burst); - - print_extra_delay_parms(p); - - q = (struct dn_flow_queue *)(p+1); - list_queues(&(p->fs), q); - } - for (fs = next; nbytes >= sizeof *fs; fs = next) { - char prefix[80]; - - if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE) - break; - l = sizeof(*fs) + fs->rq_elements * sizeof(*q); - next = (char *)fs + l; - nbytes -= l; - - if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) || - (rulenum != fs->parent_nr && co.do_pipe == 1))) { - continue; - } - - q = (struct dn_flow_queue *)(fs+1); - sprintf(prefix, "q%05d: weight %d pipe %d ", - fs->fs_nr, fs->weight, fs->parent_nr); - print_flowset_parms(fs, prefix); - list_queues(fs, q); + if (buf[0]) + printf("%s\n", buf); + buf[0] = '\0'; +} + +/* + * generic list routine. We expect objects in a specific order, i.e. + * PIPES AND SCHEDULERS: + * link; scheduler; internal flowset if any; instances + * we can tell a pipe from the number. + * + * FLOWSETS: + * flowset; queues; + * link i (int queue); scheduler i; si(i) { flowsets() : queues } + */ +static void +list_pipes(struct dn_id *oid, struct dn_id *end) +{ + char buf[160]; /* pending buffer */ + buf[0] = '\0'; + + for (; oid != end; oid = O_NEXT(oid, oid->len)) { + if (oid->len < sizeof(*oid)) + errx(1, "invalid oid len %d\n", oid->len); + + switch (oid->type) { + default: + flush_buf(buf); + printf("unrecognized object %d size %d\n", oid->type, oid->len); + break; + case DN_TEXT: /* list of attached flowsets */ + { + int i, l; + struct { + struct dn_id id; + uint32_t p[0]; + } *d = (void *)oid; + l = (oid->len - sizeof(*oid))/sizeof(d->p[0]); + if (l == 0) + break; + printf(" Children flowsets: "); + for (i = 0; i < l; i++) + printf("%u ", d->p[i]); + printf("\n"); + break; + } + case DN_CMD_GET: + if (co.verbose) + printf("answer for cmd %d, len %d\n", oid->type, oid->id); + break; + case DN_SCH: { + struct dn_sch *s = (struct dn_sch *)oid; + flush_buf(buf); + printf(" sched %d type %s flags 0x%x %d buckets %d active\n", + s->sched_nr, + s->name, s->flags, s->buckets, s->oid.id); + if (s->flags & DN_HAVE_MASK) + print_mask(&s->sched_mask); + } + break; + + case DN_FLOW: + list_flow((struct dn_flow *)oid); + break; + + case DN_LINK: { + struct dn_link *p = (struct dn_link *)oid; + double b = p->bandwidth; + char bwbuf[30]; + char burst[5 + 7]; + + /* This starts a new object so flush buffer */ + flush_buf(buf); + /* data rate */ + if (b == 0) + sprintf(bwbuf, "unlimited "); + else if (b >= 1000000) + sprintf(bwbuf, "%7.3f Mbit/s", b/1000000); + else if (b >= 1000) + sprintf(bwbuf, "%7.3f Kbit/s", b/1000); + else + sprintf(bwbuf, "%7.3f bit/s ", b); + + if (humanize_number(burst, sizeof(burst), p->burst, + "", HN_AUTOSCALE, 0) < 0 || co.verbose) + sprintf(burst, "%d", (int)p->burst); + sprintf(buf, "%05d: %s %4d ms burst %s", + p->link_nr % DN_MAX_ID, bwbuf, p->delay, burst); + } + break; + + case DN_FS: + print_flowset_parms((struct dn_fs *)oid, buf); + break; + case DN_PROFILE: + flush_buf(buf); + print_extra_delay_parms((struct dn_profile *)oid); } + flush_buf(buf); // XXX does it really go here ? + } } /* - * Delete pipe or queue i + * Delete pipe, queue or scheduler i */ int -ipfw_delete_pipe(int pipe_or_queue, int i) +ipfw_delete_pipe(int do_pipe, int i) { - struct dn_pipe p; - - memset(&p, 0, sizeof p); - if (pipe_or_queue == 1) - p.pipe_nr = i; /* pipe */ - else - p.fs.fs_nr = i; /* queue */ - i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p); + struct { + struct dn_id oid; + uintptr_t a[1]; /* add more if we want a list */ + } cmd; + oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION); + cmd.oid.subtype = (do_pipe == 1) ? DN_LINK : + ( (do_pipe == 2) ? DN_FS : DN_SCH); + cmd.a[0] = i; + i = do_cmd(IP_DUMMYNET3, &cmd, cmd.oid.len); if (i) { i = 1; warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i); @@ -400,7 +424,7 @@ ipfw_delete_pipe(int pipe_or_queue, int i) * The empirical curve may have both vertical and horizontal lines. * Vertical lines represent constant delay for a range of * probabilities; horizontal lines correspond to a discontinuty - * in the delay distribution: the pipe will use the largest delay + * in the delay distribution: the link will use the largest delay * for a given probability. * * To pass the curve to dummynet, we must store the parameters @@ -490,9 +514,12 @@ static void read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) { if (*bandwidth != -1) - warn("duplicate token, override bandwidth value!"); + warnx("duplicate token, override bandwidth value!"); if (arg[0] >= 'a' && arg[0] <= 'z') { + if (!if_name) { + errx(1, "no if support"); + } if (namelen >= IFNAMSIZ) warn("interface name truncated"); namelen--; @@ -508,7 +535,7 @@ read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) if (*end == 'K' || *end == 'k') { end++; bw *= 1000; - } else if (*end == 'M') { + } else if (*end == 'M' || *end == 'm') { end++; bw *= 1000000; } @@ -521,7 +548,8 @@ read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) errx(EX_DATAERR, "bandwidth too large"); *bandwidth = bw; - if_name[0] = '\0'; + if (if_name) + if_name[0] = '\0'; } } @@ -551,7 +579,8 @@ compare_points(const void *vp1, const void *vp2) #define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno static void -load_extra_delays(const char *filename, struct dn_pipe *p) +load_extra_delays(const char *filename, struct dn_profile *p, + struct dn_link *link) { char line[ED_MAX_LINE_LEN]; FILE *f; @@ -566,6 +595,9 @@ load_extra_delays(const char *filename, struct dn_pipe *p) struct point points[ED_MAX_SAMPLES_NO]; int points_no = 0; + /* XXX link never NULL? */ + p->link_nr = link->link_nr; + profile_name[0] = '\0'; f = fopen(filename, "r"); if (f == NULL) @@ -606,7 +638,8 @@ load_extra_delays(const char *filename, struct dn_pipe *p) ED_MAX_SAMPLES_NO); do_points = 0; } else if (!strcasecmp(name, ED_TOK_BW)) { - read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name)); + char buf[IFNAMSIZ]; + read_bandwidth(arg, &link->bandwidth, buf, sizeof(buf)); } else if (!strcasecmp(name, ED_TOK_LOSS)) { if (loss != -1.0) errx(ED_EFMT("duplicated token: %s"), name); @@ -676,17 +709,17 @@ load_extra_delays(const char *filename, struct dn_pipe *p) double y2 = points[i+1].prob * samples; double x2 = points[i+1].delay; - int index = y1; + int ix = y1; int stop = y2; if (x1 == x2) { - for (; indexsamples[index] = x1; + for (; ixsamples[ix] = x1; } else { double m = (y2-y1)/(x2-x1); double c = y1 - m*x1; - for (; indexsamples[index] = (index - c)/m; + for (; ixsamples[ix] = (ix - c)/m; } } p->samples_no = samples; @@ -694,27 +727,120 @@ load_extra_delays(const char *filename, struct dn_pipe *p) strncpy(p->name, profile_name, sizeof(p->name)); } +/* + * configuration of pipes, schedulers, flowsets. + * When we configure a new scheduler, an empty pipe is created, so: + * + * do_pipe = 1 -> "pipe N config ..." only for backward compatibility + * sched N+Delta type fifo sched_mask ... + * pipe N+Delta + * flowset N+Delta pipe N+Delta (no parameters) + * sched N type wf2q+ sched_mask ... + * pipe N + * + * do_pipe = 2 -> flowset N config + * flowset N parameters + * + * do_pipe = 3 -> sched N config + * sched N parameters (default no pipe) + * optional Pipe N config ... + * pipe ==> + */ void ipfw_config_pipe(int ac, char **av) { - int samples[ED_MAX_SAMPLES_NO]; - struct dn_pipe p; - int i; + int i, j; char *end; void *par = NULL; - - memset(&p, 0, sizeof p); - p.bandwidth = -1; + struct dn_id *buf, *base; + struct dn_sch *sch = NULL; + struct dn_link *p = NULL; + struct dn_fs *fs = NULL; + struct dn_profile *pf = NULL; + struct ipfw_flow_id *mask = NULL; + int lmax; + uint32_t _foo = 0, *flags = &_foo , *buckets = &_foo; + + /* + * allocate space for 1 header, + * 1 scheduler, 1 link, 1 flowset, 1 profile + */ + lmax = sizeof(struct dn_id); /* command header */ + lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) + + sizeof(struct dn_fs) + sizeof(struct dn_profile); av++; ac--; /* Pipe number */ if (ac && isdigit(**av)) { i = atoi(*av); av++; ac--; - if (co.do_pipe == 1) - p.pipe_nr = i; - else - p.fs.fs_nr = i; + } else + i = -1; + if (i <= 0) + errx(EX_USAGE, "need a pipe/flowset/sched number"); + base = buf = safe_calloc(1, lmax); + /* all commands start with a 'CONFIGURE' and a version */ + o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG); + base->id = DN_API_VERSION; + + switch (co.do_pipe) { + case 1: /* "pipe N config ..." */ + /* Allocate space for the WF2Q+ scheduler, its link + * and the FIFO flowset. Set the number, but leave + * the scheduler subtype and other parameters to 0 + * so the kernel will use appropriate defaults. + * XXX todo: add a flag to record if a parameter + * is actually configured. + * If we do a 'pipe config' mask -> sched_mask. + * The FIFO scheduler and link are derived from the + * WF2Q+ one in the kernel. + */ + sch = o_next(&buf, sizeof(*sch), DN_SCH); + p = o_next(&buf, sizeof(*p), DN_LINK); + fs = o_next(&buf, sizeof(*fs), DN_FS); + + sch->sched_nr = i; + sch->oid.subtype = 0; /* defaults to WF2Q+ */ + mask = &sch->sched_mask; + flags = &sch->flags; + buckets = &sch->buckets; + *flags |= DN_PIPE_CMD; + + p->link_nr = i; + + /* This flowset is only for the FIFO scheduler */ + fs->fs_nr = i + 2*DN_MAX_ID; + fs->sched_nr = i + DN_MAX_ID; + break; + + case 2: /* "queue N config ... " */ + fs = o_next(&buf, sizeof(*fs), DN_FS); + fs->fs_nr = i; + mask = &fs->flow_mask; + flags = &fs->flags; + buckets = &fs->buckets; + break; + + case 3: /* "sched N config ..." */ + sch = o_next(&buf, sizeof(*sch), DN_SCH); + fs = o_next(&buf, sizeof(*fs), DN_FS); + sch->sched_nr = i; + mask = &sch->sched_mask; + flags = &sch->flags; + buckets = &sch->buckets; + /* fs is used only with !MULTIQUEUE schedulers */ + fs->fs_nr = i + DN_MAX_ID; + fs->sched_nr = i; + break; } + /* set to -1 those fields for which we want to reuse existing + * values from the kernel. + * Also, *_nr and subtype = 0 mean reuse the value from the kernel. + * XXX todo: support reuse of the mask. + */ + if (p) + p->bandwidth = -1; + for (j = 0; j < sizeof(fs->par)/sizeof(fs->par[0]); j++) + fs->par[j] = -1; while (ac > 0) { double d; int tok = match_token(dummynet_params, *av); @@ -722,41 +848,48 @@ ipfw_config_pipe(int ac, char **av) switch(tok) { case TOK_NOERROR: - p.fs.flags_fs |= DN_NOERROR; + NEED(fs, "noerror is only for pipes"); + fs->flags |= DN_NOERROR; break; case TOK_PLR: + NEED(fs, "plr is only for pipes"); NEED1("plr needs argument 0..1\n"); d = strtod(av[0], NULL); if (d > 1) d = 1; else if (d < 0) d = 0; - p.fs.plr = (int)(d*0x7fffffff); + fs->plr = (int)(d*0x7fffffff); ac--; av++; break; case TOK_QUEUE: + NEED(fs, "queue is only for pipes or flowsets"); NEED1("queue needs queue size\n"); end = NULL; - p.fs.qsize = strtoul(av[0], &end, 0); + fs->qsize = strtoul(av[0], &end, 0); if (*end == 'K' || *end == 'k') { - p.fs.flags_fs |= DN_QSIZE_IS_BYTES; - p.fs.qsize *= 1024; + fs->flags |= DN_QSIZE_BYTES; + fs->qsize *= 1024; } else if (*end == 'B' || _substrcmp2(end, "by", "bytes") == 0) { - p.fs.flags_fs |= DN_QSIZE_IS_BYTES; + fs->flags |= DN_QSIZE_BYTES; } ac--; av++; break; case TOK_BUCKETS: + NEED(fs, "buckets is only for pipes or flowsets"); NEED1("buckets needs argument\n"); - p.fs.rq_size = strtoul(av[0], NULL, 0); + *buckets = strtoul(av[0], NULL, 0); ac--; av++; break; + case TOK_FLOW_MASK: + case TOK_SCHED_MASK: case TOK_MASK: + NEED(mask, "tok_mask"); NEED1("mask needs mask specifier\n"); /* * per-flow queue, mask is dst_ip, dst_port, @@ -764,7 +897,7 @@ ipfw_config_pipe(int ac, char **av) */ par = NULL; - bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask)); + bzero(mask, sizeof(*mask)); end = NULL; while (ac >= 1) { @@ -780,44 +913,55 @@ ipfw_config_pipe(int ac, char **av) case TOK_ALL: /* * special case, all bits significant + * except 'extra' (the queue number) */ - p.fs.flow_mask.dst_ip = ~0; - p.fs.flow_mask.src_ip = ~0; - p.fs.flow_mask.dst_port = ~0; - p.fs.flow_mask.src_port = ~0; - p.fs.flow_mask.proto = ~0; - n2mask(&(p.fs.flow_mask.dst_ip6), 128); - n2mask(&(p.fs.flow_mask.src_ip6), 128); - p.fs.flow_mask.flow_id6 = ~0; - p.fs.flags_fs |= DN_HAVE_FLOW_MASK; + mask->dst_ip = ~0; + mask->src_ip = ~0; + mask->dst_port = ~0; + mask->src_port = ~0; + mask->proto = ~0; + n2mask(&mask->dst_ip6, 128); + n2mask(&mask->src_ip6, 128); + mask->flow_id6 = ~0; + *flags |= DN_HAVE_MASK; + goto end_mask; + + case TOK_QUEUE: + mask->extra = ~0; + *flags |= DN_HAVE_MASK; goto end_mask; case TOK_DSTIP: - p32 = &p.fs.flow_mask.dst_ip; + mask->addr_type = 4; + p32 = &mask->dst_ip; break; case TOK_SRCIP: - p32 = &p.fs.flow_mask.src_ip; + mask->addr_type = 4; + p32 = &mask->src_ip; break; case TOK_DSTIP6: - pa6 = &(p.fs.flow_mask.dst_ip6); + mask->addr_type = 6; + pa6 = &mask->dst_ip6; break; case TOK_SRCIP6: - pa6 = &(p.fs.flow_mask.src_ip6); + mask->addr_type = 6; + pa6 = &mask->src_ip6; break; case TOK_FLOWID: - p20 = &p.fs.flow_mask.flow_id6; + mask->addr_type = 6; + p20 = &mask->flow_id6; break; case TOK_DSTPORT: - p16 = &p.fs.flow_mask.dst_port; + p16 = &mask->dst_port; break; case TOK_SRCPORT: - p16 = &p.fs.flow_mask.src_port; + p16 = &mask->src_port; break; case TOK_PROTO: @@ -857,10 +1001,10 @@ ipfw_config_pipe(int ac, char **av) if (a > 0xFF) errx(EX_DATAERR, "proto mask must be 8 bit"); - p.fs.flow_mask.proto = (uint8_t)a; + mask->proto = (uint8_t)a; } if (a != 0) - p.fs.flags_fs |= DN_HAVE_FLOW_MASK; + *flags |= DN_HAVE_MASK; ac--; av++; } /* end while, config masks */ end_mask: @@ -869,9 +1013,9 @@ end_mask: case TOK_RED: case TOK_GRED: NEED1("red/gred needs w_q/min_th/max_th/max_p\n"); - p.fs.flags_fs |= DN_IS_RED; + fs->flags |= DN_IS_RED; if (tok == TOK_GRED) - p.fs.flags_fs |= DN_IS_GENTLE_RED; + fs->flags |= DN_IS_GENTLE_RED; /* * the format for parameters is w_q/min_th/max_th/max_p */ @@ -879,82 +1023,108 @@ end_mask: double w_q = strtod(end, NULL); if (w_q > 1 || w_q <= 0) errx(EX_DATAERR, "0 < w_q <= 1"); - p.fs.w_q = (int) (w_q * (1 << SCALE_RED)); + fs->w_q = (int) (w_q * (1 << SCALE_RED)); } if ((end = strsep(&av[0], "/"))) { - p.fs.min_th = strtoul(end, &end, 0); + fs->min_th = strtoul(end, &end, 0); if (*end == 'K' || *end == 'k') - p.fs.min_th *= 1024; + fs->min_th *= 1024; } if ((end = strsep(&av[0], "/"))) { - p.fs.max_th = strtoul(end, &end, 0); + fs->max_th = strtoul(end, &end, 0); if (*end == 'K' || *end == 'k') - p.fs.max_th *= 1024; + fs->max_th *= 1024; } if ((end = strsep(&av[0], "/"))) { double max_p = strtod(end, NULL); if (max_p > 1 || max_p <= 0) errx(EX_DATAERR, "0 < max_p <= 1"); - p.fs.max_p = (int)(max_p * (1 << SCALE_RED)); + fs->max_p = (int)(max_p * (1 << SCALE_RED)); } ac--; av++; break; case TOK_DROPTAIL: - p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED); + NEED(fs, "droptail is only for flowsets"); + fs->flags &= ~(DN_IS_RED|DN_IS_GENTLE_RED); break; case TOK_BW: + NEED(p, "bw is only for links"); NEED1("bw needs bandwidth or interface\n"); - if (co.do_pipe != 1) - errx(EX_DATAERR, "bandwidth only valid for pipes"); - read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name)); + read_bandwidth(av[0], &p->bandwidth, NULL, 0); ac--; av++; break; case TOK_DELAY: - if (co.do_pipe != 1) - errx(EX_DATAERR, "delay only valid for pipes"); + NEED(p, "delay is only for links"); NEED1("delay needs argument 0..10000ms\n"); - p.delay = strtoul(av[0], NULL, 0); + p->delay = strtoul(av[0], NULL, 0); + ac--; av++; + break; + + case TOK_TYPE: { + int l; + NEED(sch, "type is only for schedulers"); + NEED1("type needs a string"); + l = strlen(av[0]); + if (l == 0 || l > 15) + errx(1, "type %s too long\n", av[0]); + strcpy(sch->name, av[0]); + sch->oid.subtype = 0; /* use string */ ac--; av++; break; + } case TOK_WEIGHT: - if (co.do_pipe == 1) - errx(EX_DATAERR,"weight only valid for queues"); - NEED1("weight needs argument 0..100\n"); - p.fs.weight = strtoul(av[0], &end, 0); + NEED(fs, "weight is only for flowsets"); + NEED1("weight needs argument\n"); + fs->par[0] = strtol(av[0], &end, 0); + ac--; av++; + break; + + case TOK_LMAX: + NEED(fs, "lmax is only for flowsets"); + NEED1("lmax needs argument\n"); + fs->par[1] = strtol(av[0], &end, 0); ac--; av++; break; + case TOK_PRI: + NEED(fs, "priority is only for flowsets"); + NEED1("priority needs argument\n"); + fs->par[2] = strtol(av[0], &end, 0); + ac--; av++; + break; + + case TOK_SCHED: case TOK_PIPE: - if (co.do_pipe == 1) - errx(EX_DATAERR,"pipe only valid for queues"); - NEED1("pipe needs pipe_number\n"); - p.fs.parent_nr = strtoul(av[0], &end, 0); + NEED(fs, "pipe/sched"); + NEED1("pipe/link/sched needs number\n"); + fs->sched_nr = strtoul(av[0], &end, 0); ac--; av++; break; - case TOK_PIPE_PROFILE: - if (co.do_pipe != 1) - errx(EX_DATAERR, "extra delay only valid for pipes"); + case TOK_PROFILE: + NEED((!pf), "profile already set"); + NEED(p, "profile"); + { NEED1("extra delay needs the file name\n"); - p.samples = &samples[0]; - load_extra_delays(av[0], &p); + pf = o_next(&buf, sizeof(*pf), DN_PROFILE); + load_extra_delays(av[0], pf, p); //XXX can't fail? --ac; ++av; + } break; case TOK_BURST: - if (co.do_pipe != 1) - errx(EX_DATAERR, "burst only valid for pipes"); + NEED(p, "burst"); NEED1("burst needs argument\n"); errno = 0; - if (expand_number(av[0], &p.burst) < 0) + if (expand_number(av[0], (int64_t *)&p->burst) < 0) if (errno != ERANGE) errx(EX_DATAERR, "burst: invalid argument"); - if (errno || p.burst > (1ULL << 48) - 1) + if (errno || p->burst > (1ULL << 48) - 1) errx(EX_DATAERR, "burst: out of range (0..2^48-1)"); ac--; av++; @@ -964,26 +1134,17 @@ end_mask: errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]); } } - if (co.do_pipe == 1) { - if (p.pipe_nr == 0) - errx(EX_DATAERR, "pipe_nr must be > 0"); - if (p.delay > 10000) - errx(EX_DATAERR, "delay must be < 10000"); - } else { /* co.do_pipe == 2, queue */ - if (p.fs.parent_nr == 0) - errx(EX_DATAERR, "pipe must be > 0"); - if (p.fs.weight >100) - errx(EX_DATAERR, "weight must be <= 100"); - } - /* check for bandwidth value */ - if (p.bandwidth == -1) { - p.bandwidth = 0; - if (p.samples_no > 0) - errx(EX_DATAERR, "profile requires a bandwidth limit"); + /* check validity of parameters */ + if (p) { + if (p->delay > 10000) + errx(EX_DATAERR, "delay must be < 10000"); + if (p->bandwidth == -1) + p->bandwidth = 0; } - - if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) { + if (fs) { + /* XXX accept a 0 scheduler to keep the default */ + if (fs->flags & DN_QSIZE_BYTES) { size_t len; long limit; @@ -991,9 +1152,9 @@ end_mask: if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit", &limit, &len, NULL, 0) == -1) limit = 1024*1024; - if (p.fs.qsize > limit) + if (fs->qsize > limit) errx(EX_DATAERR, "queue size must be < %ldB", limit); - } else { + } else { size_t len; long limit; @@ -1001,27 +1162,25 @@ end_mask: if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit", &limit, &len, NULL, 0) == -1) limit = 100; - if (p.fs.qsize > limit) + if (fs->qsize > limit) errx(EX_DATAERR, "2 <= queue size <= %ld", limit); - } - if (p.fs.flags_fs & DN_IS_RED) { + } + + if (fs->flags & DN_IS_RED) { size_t len; int lookup_depth, avg_pkt_size; - double s, idle, weight, w_q; - struct clockinfo ck; - int t; + double w_q; - if (p.fs.min_th >= p.fs.max_th) + if (fs->min_th >= fs->max_th) errx(EX_DATAERR, "min_th %d must be < than max_th %d", - p.fs.min_th, p.fs.max_th); - if (p.fs.max_th == 0) + fs->min_th, fs->max_th); + if (fs->max_th == 0) errx(EX_DATAERR, "max_th must be > 0"); len = sizeof(int); if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth", &lookup_depth, &len, NULL, 0) == -1) - errx(1, "sysctlbyname(\"%s\")", - "net.inet.ip.dummynet.red_lookup_depth"); + lookup_depth = 256; if (lookup_depth == 0) errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth" " must be greater than zero"); @@ -1029,18 +1188,13 @@ end_mask: len = sizeof(int); if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size", &avg_pkt_size, &len, NULL, 0) == -1) + avg_pkt_size = 512; - errx(1, "sysctlbyname(\"%s\")", - "net.inet.ip.dummynet.red_avg_pkt_size"); if (avg_pkt_size == 0) errx(EX_DATAERR, "net.inet.ip.dummynet.red_avg_pkt_size must" " be greater than zero"); - len = sizeof(struct clockinfo); - if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1) - errx(1, "sysctlbyname(\"%s\")", "kern.clockrate"); - /* * Ticks needed for sending a medium-sized packet. * Unfortunately, when we are configuring a WF2Q+ queue, we @@ -1050,38 +1204,181 @@ end_mask: * correct. But on the other hand, why do we want RED with * WF2Q+ ? */ +#if 0 if (p.bandwidth==0) /* this is a WF2Q+ queue */ s = 0; else s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth; - +#endif /* * max idle time (in ticks) before avg queue size becomes 0. * NOTA: (3/w_q) is approx the value x so that * (1-w_q)^x < 10^-3. */ - w_q = ((double)p.fs.w_q) / (1 << SCALE_RED); + w_q = ((double)fs->w_q) / (1 << SCALE_RED); +#if 0 // go in kernel idle = s * 3. / w_q; - p.fs.lookup_step = (int)idle / lookup_depth; - if (!p.fs.lookup_step) - p.fs.lookup_step = 1; + fs->lookup_step = (int)idle / lookup_depth; + if (!fs->lookup_step) + fs->lookup_step = 1; weight = 1 - w_q; - for (t = p.fs.lookup_step; t > 1; --t) + for (t = fs->lookup_step; t > 1; --t) weight *= 1 - w_q; - p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED)); + fs->lookup_weight = (int)(weight * (1 << SCALE_RED)); +#endif + } } - if (p.samples_no <= 0) { - i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p); - } else { - struct dn_pipe_max pm; - int len = sizeof(pm); - memcpy(&pm.pipe, &p, sizeof(pm.pipe)); - memcpy(&pm.samples, samples, sizeof(pm.samples)); - - i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len); - } + i = do_cmd(IP_DUMMYNET3, base, (char *)buf - (char *)base); if (i) err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE"); } + +void +dummynet_flush(void) +{ + struct dn_id oid; + oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION); + do_cmd(IP_DUMMYNET3, &oid, oid.len); +} + +/* Parse input for 'ipfw [pipe|sched|queue] show [range list]' + * Returns the number of ranges, and possibly stores them + * in the array v of size len. + */ +static int +parse_range(int ac, char *av[], uint32_t *v, int len) +{ + int n = 0; + char *endptr, *s; + uint32_t base[2]; + + if (v == NULL || len < 2) { + v = base; + len = 2; + } + + for (s = *av; s != NULL; av++, ac--) { + v[0] = strtoul(s, &endptr, 10); + v[1] = (*endptr != '-') ? v[0] : + strtoul(endptr+1, &endptr, 10); + if (*endptr == '\0') { /* prepare for next round */ + s = (ac > 0) ? *(av+1) : NULL; + } else { + if (*endptr != ',') { + warn("invalid number: %s", s); + s = ++endptr; + continue; + } + /* continue processing from here */ + s = ++endptr; + ac++; + av--; + } + if (v[1] < v[0] || + v[1] < 0 || v[1] >= DN_MAX_ID-1 || + v[0] < 0 || v[1] >= DN_MAX_ID-1) { + continue; /* invalid entry */ + } + n++; + /* translate if 'pipe list' */ + if (co.do_pipe == 1) { + v[0] += DN_MAX_ID; + v[1] += DN_MAX_ID; + } + v = (n*2 < len) ? v + 2 : base; + } + return n; +} + +/* main entry point for dummynet list functions. co.do_pipe indicates + * which function we want to support. + * av may contain filtering arguments, either individual entries + * or ranges, or lists (space or commas are valid separators). + * Format for a range can be n1-n2 or n3 n4 n5 ... + * In a range n1 must be <= n2, otherwise the range is ignored. + * A number 'n4' is translate in a range 'n4-n4' + * All number must be > 0 and < DN_MAX_ID-1 + */ +void +dummynet_list(int ac, char *av[], int show_counters) +{ + struct dn_id *oid, *x = NULL; + int ret, i, l; + int n; /* # of ranges */ + int buflen; + int max_size; /* largest obj passed up */ + + ac--; + av++; /* skip 'list' | 'show' word */ + + n = parse_range(ac, av, NULL, 0); /* Count # of ranges. */ + + /* Allocate space to store ranges */ + l = sizeof(*oid) + sizeof(uint32_t) * n * 2; + oid = safe_calloc(1, l); + oid_fill(oid, l, DN_CMD_GET, DN_API_VERSION); + + if (n > 0) /* store ranges in idx */ + parse_range(ac, av, (uint32_t *)(oid + 1), n*2); + /* + * Compute the size of the largest object returned. If the + * response leaves at least this much spare space in the + * buffer, then surely the response is complete; otherwise + * there might be a risk of truncation and we will need to + * retry with a larger buffer. + * XXX don't bother with smaller structs. + */ + max_size = sizeof(struct dn_fs); + if (max_size < sizeof(struct dn_sch)) + max_size = sizeof(struct dn_sch); + if (max_size < sizeof(struct dn_flow)) + max_size = sizeof(struct dn_flow); + + switch (co.do_pipe) { + case 1: + oid->subtype = DN_LINK; /* list pipe */ + break; + case 2: + oid->subtype = DN_FS; /* list queue */ + break; + case 3: + oid->subtype = DN_SCH; /* list sched */ + break; + } + + /* + * Ask the kernel an estimate of the required space (result + * in oid.id), unless we are requesting a subset of objects, + * in which case the kernel does not give an exact answer. + * In any case, space might grow in the meantime due to the + * creation of new queues, so we must be prepared to retry. + */ + if (n > 0) { + buflen = 4*1024; + } else { + ret = do_cmd(-IP_DUMMYNET3, oid, (uintptr_t)&l); + if (ret != 0 || oid->id <= sizeof(*oid)) + goto done; + buflen = oid->id + max_size; + oid->len = sizeof(*oid); /* restore */ + } + /* Try a few times, until the buffer fits */ + for (i = 0; i < 20; i++) { + l = buflen; + x = safe_realloc(x, l); + bcopy(oid, x, oid->len); + ret = do_cmd(-IP_DUMMYNET3, x, (uintptr_t)&l); + if (ret != 0 || x->id <= sizeof(*oid)) + goto done; /* no response */ + if (l + max_size <= buflen) + break; /* ok */ + buflen *= 2; /* double for next attempt */ + } + list_pipes(x, O_NEXT(x, l)); +done: + if (x) + free(x); + free(oid); +} diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8 index f8b0746..897cd3f 100644 --- a/sbin/ipfw/ipfw.8 +++ b/sbin/ipfw/ipfw.8 @@ -6,8 +6,10 @@ .Os .Sh NAME .Nm ipfw -.Nd IP firewall and traffic shaper control program +.Nd User interface for firewall, traffic shaper, packet scheduler, +in-kernel NAT. .Sh SYNOPSIS +.Ss FIREWALL CONFIGURATION .Nm .Op Fl cq .Cm add @@ -26,12 +28,6 @@ .Op Cm set Ar N .Brq Cm delete | zero | resetlog .Op Ar number ... -.Nm -.Cm enable -.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive -.Nm -.Cm disable -.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive .Pp .Nm .Cm set Oo Cm disable Ar number ... Oc Op Cm enable Ar number ... @@ -43,7 +39,16 @@ .Cm set swap Ar number number .Nm .Cm set show +.Ss SYSCTL SHORTCUTS +.Pp +.Nm +.Cm enable +.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive +.Nm +.Cm disable +.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive .Pp +.Ss LOOKUP TABLES .Nm .Cm table Ar number Cm add Ar addr Ns Oo / Ns Ar masklen Oc Op Ar value .Nm @@ -57,17 +62,19 @@ .Brq Ar number | all .Cm list .Pp +.Ss DUMMYNET CONFIGURATION (TRAFFIC SHAPER AND PACKET SCHEDULER) .Nm -.Brq Cm pipe | queue +.Brq Cm pipe | queue | sched .Ar number .Cm config .Ar config-options .Nm .Op Fl s Op Ar field -.Brq Cm pipe | queue +.Brq Cm pipe | queue | sched .Brq Cm delete | list | show .Op Ar number ... .Pp +.Ss IN-KERNEL NAT .Nm .Op Fl q .Cm nat @@ -89,28 +96,27 @@ The .Nm utility is the user interface for controlling the .Xr ipfw 4 -firewall and the +firewall, the .Xr dummynet 4 -traffic shaper in -.Fx . +traffic shaper/packet scheduler, and the +in-kernel NAT services. .Pp -An -.Nm -configuration, or +A firewall configuration, or .Em ruleset , is made of a list of .Em rules numbered from 1 to 65535. -Packets are passed to -.Nm +Packets are passed to the firewall from a number of different places in the protocol stack (depending on the source and destination of the packet, -it is possible that -.Nm -is invoked multiple times on the same packet). +it is possible for the firewall to be +invoked multiple times on the same packet). The packet passed to the firewall is compared -against each of the rules in the firewall -.Em ruleset . +against each of the rules in the +.Em ruleset , +in rule-number order +(multiple rules with the same number are permitted, in which case +they are processed in order of insertion). When a match is found, the action corresponding to the matching rule is performed. .Pp @@ -118,9 +124,7 @@ Depending on the action and certain system settings, packets can be reinjected into the firewall at some rule after the matching one for further processing. .Pp -An -.Nm -ruleset always includes a +A ruleset always includes a .Em default rule (numbered 65535) which cannot be modified or deleted, and matches all packets. @@ -137,14 +141,14 @@ If the ruleset includes one or more rules with the or .Cm limit option, -.Nm -will have a +the firewall will have a .Em stateful -behaviour, i.e., upon a match it will create dynamic rules matching -the exact parameters (source and destination addresses and ports) -of the matching packet. -.Pp -These dynamic rules, which have a limited lifetime, are checked +behaviour, i.e., upon a match it will create +.Em dynamic rules , +i.e. rules that match packets with the same 5-tuple +(protocol, source and destination addresses and ports) +as the packet which caused their creation. +Dynamic rules, which have a limited lifetime, are checked at the first occurrence of a .Cm check-state , .Cm keep-state @@ -283,6 +287,7 @@ When listing, show last match timestamp as seconds from the epoch. This form can be more convenient for postprocessing by scripts. .El .Pp +.Ss LIST OF RULES AND PREPROCESSING To ease configuration, rules can be put into a file which is processed using .Nm @@ -322,14 +327,16 @@ This allows for flexible configuration files (like conditionalizing them on the local hostname) and the use of macros to centralize frequently required arguments like IP addresses. .Pp +.Ss TRAFFIC SHAPER CONFIGURATION The .Nm -.Cm pipe +.Cm pipe , queue and -.Cm queue -commands are used to configure the traffic shaper, as shown in the +.Cm sched +commands are used to configure the traffic shaper and packet scheduler. +See the .Sx TRAFFIC SHAPER (DUMMYNET) CONFIGURATION -Section below. +Section below for details. .Pp If the world and the kernel get out of sync the .Nm @@ -362,7 +369,7 @@ have this picture in mind in order to design a correct ruleset. | to devices | .Ed .Pp -As can be noted from the above picture, the number of +The number of times the same packet goes through the firewall can vary between 0 and 4 depending on packet source and destination, and system configuration. @@ -421,9 +428,9 @@ Keywords are case-sensitive, whereas arguments may or may not be case-sensitive depending on their nature (e.g.\& uid's are, hostnames are not). .Pp -In -.Nm ipfw2 -you can introduce spaces after commas ',' to make +Some arguments (e.g. port or address lists) are comma-separated +lists of values. +In this case, spaces after commas ',' are allowed to make the line more readable. You can also put the entire command (including flags) into a single argument. @@ -434,9 +441,7 @@ ipfw -q add deny src-ip 10.0.0.0/24, 127.0.0.1/8 ipfw "-q add deny src-ip 10.0.0.0/24, 127.0.0.1/8" .Ed .Sh RULE FORMAT -The format of -.Nm -rules is the following: +The format of firewall rules is the following: .Bd -ragged -offset indent .Bk -words .Op Ar rule_number @@ -496,7 +501,7 @@ in future forwarding decisions. .El .Pp Note that some of the above information, e.g.\& source MAC or IP addresses and -TCP/UDP ports, could easily be spoofed, so filtering on those fields +TCP/UDP ports, can be easily spoofed, so filtering on those fields alone might not guarantee the desired results. .Bl -tag -width indent .It Ar rule_number @@ -1002,6 +1007,7 @@ The second format with multiple addresses) is provided for convenience only and its use is discouraged. .It Ar addr : Oo Cm not Oc Bro +.Bl -tag -width indent .Cm any | me | me6 | .Cm table Ns Pq Ar number Ns Op , Ns Ar value .Ar | addr-list | addr-set @@ -1023,6 +1029,7 @@ is also specified, an entry will match only if it has this value. See the .Sx LOOKUP TABLES section below for more information on lookup tables. +.El .It Ar addr-list : ip-addr Ns Op Ns , Ns Ar addr-list .It Ar ip-addr : A host or subnet address specified in one of the following ways: @@ -1389,6 +1396,20 @@ of source and destination addresses and ports can be specified. Currently, only IPv4 flows are supported. +.It Cm lookup Bro Cm dst-ip | dst-port | src-ip | src-port | uid | jail Brc Ar N +Search an entry in lookup table +.Ar N +that matches the field specified as argument. +If not found, the match fails. +Otherwise, the match succeeds and +.Cm tablearg +is set to the value extracted from the table. +.Pp +This option can be useful to quickly dispatch traffic based on +certain packet fields. +See the +.Sx LOOKUP TABLES +section below for more information on lookup tables. .It Cm { MAC | mac } Ar dst-mac src-mac Match packets with a given .Ar dst-mac @@ -1480,7 +1501,7 @@ is invalid) whenever .Cm xmit is used. .Pp -A packet may not have a receive or transmit interface: packets +A packet might not have a receive or transmit interface: packets originating from the local host have no receive interface, while packets destined for the local host have no transmit interface. @@ -1627,15 +1648,17 @@ because it engages only on packets with source addresses of directly connected networks instead of all source addresses. .El .Sh LOOKUP TABLES -Lookup tables are useful to handle large sparse address sets, -typically from a hundred to several thousands of entries. +Lookup tables are useful to handle large sparse sets of +addresses or other search keys (e.g. ports, jail IDs). +In the rest of this section we will use the term ``address'' +to mean any unsigned value of up to 32-bit. There may be up to 128 different lookup tables, numbered 0 to 127. .Pp Each entry is represented by an .Ar addr Ns Op / Ns Ar masklen and will match all addresses with base .Ar addr -(specified as an IP address or a hostname) +(specified as an IP address, a hostname or an unsigned integer) and mask width of .Ar masklen bits. @@ -1653,9 +1676,9 @@ is not specified, it defaults to 0. .Pp An entry can be added to a table .Pq Cm add , -removed from a table -.Pq Cm delete , -a table can be examined +or removed from a table +.Pq Cm delete . +A table can be examined .Pq Cm list or flushed .Pq Cm flush . @@ -1664,7 +1687,7 @@ Internally, each table is stored in a Radix tree, the same way as the routing table (see .Xr route 4 ) . .Pp -Lookup tables currently support IPv4 addresses only. +Lookup tables currently support only ports, jail IDs and IPv4 addresses. .Pp The .Cm tablearg @@ -1822,9 +1845,9 @@ for more examples on how to use dynamic rules. .Nm is also the user interface for the .Nm dummynet -traffic shaper and network emulator, a subsystem that +traffic shaper, packet scheduler and network emulator, a subsystem that can artificially queue, delay or drop packets -emulator the behaviour of certain network links +emulating the behaviour of certain network links or queueing systems. .Pp .Nm dummynet @@ -1836,26 +1859,33 @@ Matching packets are then passed to either of two different objects, which implement the traffic regulation: .Bl -hang -offset XXXX .It Em pipe -A pipe emulates a link with given bandwidth, propagation delay, +A +.Em pipe +emulates a +.Em link +with given bandwidth and propagation delay, +driven by a FIFO scheduler and a single queue with programmable queue size and packet loss rate. -Packets are queued in front of the pipe as they come out from the classifier, -and then transferred to the pipe according to the pipe's parameters. +Packets are appended to the queue as they come out from +.Nm ipfw , +and then transferred in FIFO order to the link at the desired rate. .It Em queue -A queue -is an abstraction used to implement the WF2Q+ -(Worst-case Fair Weighted Fair Queueing) policy, which is -an efficient variant of the WFQ policy. -.Pp -The queue associates a -.Em weight -and a reference pipe to each flow (a flow is a set of packets -with the same addresses and ports after masking). -All backlogged flows (i.e., those -with packets queued) linked to the same pipe share the pipe's -bandwidth proportionally to their weights. -Note that weights are not priorities; a flow with a lower weight -is still guaranteed to get its fraction of the bandwidth even if a -flow with a higher weight is permanently backlogged. +A +.Em queue +is an abstraction used to implement packet scheduling +using one of several packet scheduling algorithms. +Packets sent to a +.Em queue +are first grouped into flows according to a mask on the 5-tuple. +Flows are then passed to the scheduler associated to the +.Em queue , +and each flow uses scheduling parameters (weight and others) +as configured in the +.Em queue +itself. +A scheduler in turn is connected to an emulated link, +and arbitrates the link's bandwidth among backlogged flows according to +weights and to the features of the scheduling algorithm in use. .El .Pp In practice, @@ -1864,6 +1894,52 @@ can be used to set hard limits to the bandwidth that a flow can use, whereas .Em queues can be used to determine how different flows share the available bandwidth. .Pp +A graphical representation of the binding of queues, +flows, schedulers and links is below. +.Bd -literal -offset indent + (flow_mask|sched_mask) sched_mask + +---------+ weight Wx +-------------+ + | |->-[flow]-->--| |-+ + -->--| QUEUE x | ... | | | + | |->-[flow]-->--| SCHEDuler N | | + +---------+ | | | + ... | +--[LINK N]-->-- + +---------+ weight Wy | | +--[LINK N]-->-- + | |->-[flow]-->--| | | + -->--| QUEUE y | ... | | | + | |->-[flow]-->--| | | + +---------+ +-------------+ | + +-------------+ +.Ed +It is important to understand the role of the SCHED_MASK +and FLOW_MASK, which are configured through the commands +.Dl "ipfw sched N config mask SCHED_MASK ..." +and +.Dl "ipfw queue X config mask FLOW_MASK ..." . +.Pp +The SCHED_MASK is used to assign flows to one or more +scheduler instances, one for each +value of the packet's 5-fuple after applying SCHED_MASK. +As an example, using ``src-ip 0xffffff00'' creates one instance +for each /24 destination subnet. +.Pp +The FLOW_MASK, together with the SCHED_MASK, is used to split +packets into flows. As an example, using +``src-ip 0x000000ff'' +together with the previous SCHED_MASK makes a flow for +each individual source address. In turn, flows for each /24 +subnet will be sent to the same scheduler instance. +.Pp +The above diagram holds even for the +.Em pipe +case, with the only restriction that a +.Em pipe +only supports a SCHED_MASK, and forces the use of a FIFO +scheduler (these are for backward compatibility reasons; +in fact, internally, a +.Nm dummynet's +pipe is implemented exactly as above). +.Pp There are two modes of .Nm dummynet operation: @@ -1895,16 +1971,19 @@ mode can be enabled by setting the .Xr sysctl 8 variable to a non-zero value. .Pp -.Ss PIPE AND QUEUE CONFIGURATION +.Ss PIPE, QUEUE AND SCHEDULER CONFIGURATION The -.Em pipe -and +.Em pipe , .Em queue +and +.Em scheduler configuration commands are the following: .Bd -ragged -offset indent .Cm pipe Ar number Cm config Ar pipe-configuration .Pp .Cm queue Ar number Cm config Ar queue-configuration +.Pp +.Cm sched Ar number Cm config Ar sched-configuration .Ed .Pp The following parameters can be configured for a pipe: @@ -2057,6 +2136,41 @@ Specifies the weight to be used for flows matching this queue. The weight must be in the range 1..100, and defaults to 1. .El .Pp +The following parameters can be configured for a scheduler: +.Pp +.Bl -tag -width indent -compact +.It Cm type Ar {fifo | wf2qp | rr | qfq} +specifies the scheduling algorithm to use. +.Bl -tag -width indent -compact +.It cm fifo +is just a FIFO scheduler (which means that all packets +are stored in the same queue as they arrive to the scheduler). +FIFO has O(1) per-packet time complexity, with very low +constants (estimate 60-80ns on a 2Ghz desktop machine) +but gives no service guarantees. +.It Cm wf2qp +implements the WF2Q+ algorithm, which is a Weighted Fair Queueing +algorithm which permits flows to share bandwidth according to +their weights. Note that weights are not priorities; even a flow +with a minuscule weight will never starve. +WF2Q+ has O(log N) per-packet processing cost, where N is the number +of flows, and is the default algorithm used by previous versions +dummynet's queues. +.It Cm rr +implements the Deficit Round Robin algorithm, which has O(1) processing +costs (roughly, 100-150ns per packet) +and permits bandwidth allocation according to weights, but +with poor service guarantees. +.It Cm qfq +implements the QFQ algorithm, which is a very fast variant of +WF2Q+, with similar service guarantees and O(1) processing +costs (roughly, 200-250ns per packet). +.El +.El +.Pp +In addition to the type, all parameters allowed for a pipe can also +be specified for a scheduler. +.Pp Finally, the following parameters can be configured for both pipes and queues: .Pp diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c index b19f390..1ab827f 100644 --- a/sbin/ipfw/ipfw2.c +++ b/sbin/ipfw/ipfw2.c @@ -57,7 +57,7 @@ struct cmdline_opts co; /* global options */ int resvd_set_number = RESVD_SET; #define GET_UINT_ARG(arg, min, max, tok, s_x) do { \ - if (!ac) \ + if (!av[0]) \ errx(EX_USAGE, "%s: missing argument", match_value(s_x, tok)); \ if (_substrcmp(*av, "tablearg") == 0) { \ arg = IP_FW_TABLEARG; \ @@ -65,23 +65,23 @@ int resvd_set_number = RESVD_SET; } \ \ { \ - long val; \ + long _xval; \ char *end; \ \ - val = strtol(*av, &end, 10); \ + _xval = strtol(*av, &end, 10); \ \ - if (!isdigit(**av) || *end != '\0' || (val == 0 && errno == EINVAL)) \ + if (!isdigit(**av) || *end != '\0' || (_xval == 0 && errno == EINVAL)) \ errx(EX_DATAERR, "%s: invalid argument: %s", \ match_value(s_x, tok), *av); \ \ - if (errno == ERANGE || val < min || val > max) \ + if (errno == ERANGE || _xval < min || _xval > max) \ errx(EX_DATAERR, "%s: argument is out of range (%u..%u): %s", \ match_value(s_x, tok), min, max, *av); \ \ - if (val == IP_FW_TABLEARG) \ + if (_xval == IP_FW_TABLEARG) \ errx(EX_DATAERR, "%s: illegal argument value: %s", \ match_value(s_x, tok), *av); \ - arg = val; \ + arg = _xval; \ } \ } while (0) @@ -224,6 +224,15 @@ static struct _s_x rule_action_params[] = { { NULL, 0 } /* terminator */ }; +/* + * The 'lookup' instruction accepts one of the following arguments. + * -1 is a terminator for the list. + * Arguments are passed as v[1] in O_DST_LOOKUP options. + */ +static int lookup_key[] = { + TOK_DSTIP, TOK_SRCIP, TOK_DSTPORT, TOK_SRCPORT, + TOK_UID, TOK_JAIL, TOK_DSCP, -1 }; + static struct _s_x rule_options[] = { { "tagged", TOK_TAGGED }, { "uid", TOK_UID }, @@ -249,6 +258,7 @@ static struct _s_x rule_options[] = { { "iplen", TOK_IPLEN }, { "ipid", TOK_IPID }, { "ipprecedence", TOK_IPPRECEDENCE }, + { "dscp", TOK_DSCP }, { "iptos", TOK_IPTOS }, { "ipttl", TOK_IPTTL }, { "ipversion", TOK_IPVER }, @@ -290,6 +300,7 @@ static struct _s_x rule_options[] = { { "dst-ip6", TOK_DSTIP6}, { "src-ipv6", TOK_SRCIP6}, { "src-ip6", TOK_SRCIP6}, + { "lookup", TOK_LOOKUP}, { "//", TOK_COMMENT }, { "not", TOK_NOT }, /* pseudo option */ @@ -343,6 +354,7 @@ safe_realloc(void *ptr, size_t size) /* * conditionally runs the command. + * Selected options or negative -> getsockopt */ int do_cmd(int optname, void *optval, uintptr_t optlen) @@ -362,11 +374,15 @@ do_cmd(int optname, void *optval, uintptr_t optlen) optname == IP_FW_ADD || optname == IP_FW_TABLE_LIST || optname == IP_FW_TABLE_GETSIZE || optname == IP_FW_NAT_GET_CONFIG || - optname == IP_FW_NAT_GET_LOG) + optname < 0 || + optname == IP_FW_NAT_GET_LOG) { + if (optname < 0) + optname = -optname; i = getsockopt(s, IPPROTO_IP, optname, optval, (socklen_t *)optlen); - else + } else { i = setsockopt(s, IPPROTO_IP, optname, optval, optlen); + } return i; } @@ -739,9 +755,19 @@ static void print_ip(ipfw_insn_ip *cmd, char const *s) { struct hostent *he = NULL; - int len = F_LEN((ipfw_insn *)cmd); + uint32_t len = F_LEN((ipfw_insn *)cmd); uint32_t *a = ((ipfw_insn_u32 *)cmd)->d; + if (cmd->o.opcode == O_IP_DST_LOOKUP && len > F_INSN_SIZE(ipfw_insn_u32)) { + uint32_t d = a[1]; + const char *arg = ""; + + if (d < sizeof(lookup_key)/sizeof(lookup_key[0])) + arg = match_value(rule_options, lookup_key[d]); + printf("%s lookup %s %d", cmd->o.len & F_NOT ? " not": "", + arg, cmd->o.arg1); + return; + } printf("%s%s ", cmd->o.len & F_NOT ? " not": "", s); if (cmd->o.opcode == O_IP_SRC_ME || cmd->o.opcode == O_IP_DST_ME) { @@ -1108,9 +1134,11 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) else printf(" log"); } +#ifndef NO_ALTQ if (altqptr) { print_altq_cmd(altqptr); } +#endif if (tagptr) { if (tagptr->len & F_NOT) PRINT_UINT_ARG(" untag ", tagptr->arg1); @@ -1595,26 +1623,33 @@ show_dyn_ipfw(ipfw_dyn_rule *d, int pcwidth, int bcwidth) * ipfw set move rule X to Y */ void -ipfw_sets_handler(int ac, char *av[]) +ipfw_sets_handler(char *av[]) { uint32_t set_disable, masks[2]; int i, nbytes; uint16_t rulenum; uint8_t cmd, new_set; - ac--; av++; - if (!ac) + if (av[0] == NULL) errx(EX_USAGE, "set needs command"); if (_substrcmp(*av, "show") == 0) { - void *data; + void *data = NULL; char const *msg; - - nbytes = sizeof(struct ip_fw); + int nalloc; + + nalloc = nbytes = sizeof(struct ip_fw); + while (nbytes >= nalloc) { + if (data) + free(data); + nalloc = nalloc * 2 + 200; + nbytes = nalloc; data = safe_calloc(1, nbytes); if (do_cmd(IP_FW_GET, data, (uintptr_t)&nbytes) < 0) err(EX_OSERR, "getsockopt(IP_FW_GET)"); + } + bcopy(&((struct ip_fw *)data)->next_rule, &set_disable, sizeof(set_disable)); @@ -1631,8 +1666,8 @@ ipfw_sets_handler(int ac, char *av[]) } printf("\n"); } else if (_substrcmp(*av, "swap") == 0) { - ac--; av++; - if (ac != 2) + av++; + if ( av[0] == NULL || av[1] == NULL ) errx(EX_USAGE, "set swap needs 2 set numbers\n"); rulenum = atoi(av[0]); new_set = atoi(av[1]); @@ -1643,13 +1678,14 @@ ipfw_sets_handler(int ac, char *av[]) masks[0] = (4 << 24) | (new_set << 16) | (rulenum); i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t)); } else if (_substrcmp(*av, "move") == 0) { - ac--; av++; - if (ac && _substrcmp(*av, "rule") == 0) { + av++; + if (av[0] && _substrcmp(*av, "rule") == 0) { cmd = 2; - ac--; av++; + av++; } else cmd = 3; - if (ac != 3 || _substrcmp(av[1], "to") != 0) + if (av[0] == NULL || av[1] == NULL || av[2] == NULL || + av[3] != NULL || _substrcmp(av[1], "to") != 0) errx(EX_USAGE, "syntax: set move [rule] X to Y\n"); rulenum = atoi(av[0]); new_set = atoi(av[2]); @@ -1664,10 +1700,10 @@ ipfw_sets_handler(int ac, char *av[]) _substrcmp(*av, "enable") == 0 ) { int which = _substrcmp(*av, "enable") == 0 ? 1 : 0; - ac--; av++; + av++; masks[0] = masks[1] = 0; - while (ac) { + while (av[0]) { if (isdigit(**av)) { i = atoi(*av); if (i < 0 || i > RESVD_SET) @@ -1681,7 +1717,7 @@ ipfw_sets_handler(int ac, char *av[]) else errx(EX_DATAERR, "invalid set command %s\n", *av); - av++; ac--; + av++; } if ( (masks[0] & masks[1]) != 0 ) errx(EX_DATAERR, @@ -1695,12 +1731,11 @@ ipfw_sets_handler(int ac, char *av[]) } void -ipfw_sysctl_handler(int ac, char *av[], int which) +ipfw_sysctl_handler(char *av[], int which) { - ac--; av++; - if (ac == 0) { + if (av[0] == NULL) { warnx("missing keyword to enable/disable\n"); } else if (_substrcmp(*av, "firewall") == 0) { sysctlbyname("net.inet.ip.fw.enable", NULL, 0, @@ -1717,8 +1752,10 @@ ipfw_sysctl_handler(int ac, char *av[], int which) } else if (_substrcmp(*av, "dyn_keepalive") == 0) { sysctlbyname("net.inet.ip.fw.dyn_keepalive", NULL, 0, &which, sizeof(which)); +#ifndef NO_ALTQ } else if (_substrcmp(*av, "altq") == 0) { altq_set_enabled(which); +#endif } else { warnx("unrecognize enable/disable keyword: %s\n", *av); } @@ -1751,6 +1788,10 @@ ipfw_list(int ac, char *av[], int show_counters) fprintf(stderr, "Testing only, list disabled\n"); return; } + if (co.do_pipe) { + dummynet_list(ac, av, show_counters); + return; + } ac--; av++; @@ -1767,11 +1808,6 @@ ipfw_list(int ac, char *av[], int show_counters) co.do_pipe ? "DUMMYNET" : "FW"); } - if (co.do_pipe) { - ipfw_list_pipes(data, nbytes, ac, av); - goto done; - } - /* * Count static rules. They have variable size so we * need to scan the list to count them. @@ -2119,7 +2155,7 @@ fill_ip(ipfw_insn_ip *cmd, char *av) return; } /* A single IP can be stored in an optimized format */ - if (d[1] == ~0 && av == NULL && len == 0) { + if (d[1] == (uint32_t)~0 && av == NULL && len == 0) { cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); return; } @@ -2188,29 +2224,28 @@ fill_flags(ipfw_insn *cmd, enum ipfw_opcodes opcode, void -ipfw_delete(int ac, char *av[]) +ipfw_delete(char *av[]) { uint32_t rulenum; int i; int exitval = EX_OK; int do_set = 0; - - av++; ac--; + av++; NEED1("missing rule specification"); - if (ac > 0 && _substrcmp(*av, "set") == 0) { + if ( *av && _substrcmp(*av, "set") == 0) { /* Do not allow using the following syntax: * ipfw set N delete set M */ if (co.use_set) errx(EX_DATAERR, "invalid syntax"); do_set = 1; /* delete set */ - ac--; av++; + av++; } /* Rule number */ - while (ac && isdigit(**av)) { - i = atoi(*av); av++; ac--; + while (*av && isdigit(**av)) { + i = atoi(*av); av++; if (co.do_nat) { exitval = do_cmd(IP_FW_NAT_DEL, &i, sizeof i); if (exitval) { @@ -2264,7 +2299,8 @@ fill_iface(ipfw_insn_if *cmd, char *arg) static void get_mac_addr_mask(const char *p, uint8_t *addr, uint8_t *mask) { - int i, l; + int i; + size_t l; char *ap, *ptr, *optr; struct ether_addr *mac; const char *macset = "0123456789abcdefABCDEF:"; @@ -2286,11 +2322,11 @@ get_mac_addr_mask(const char *p, uint8_t *addr, uint8_t *mask) if (ptr != NULL) { /* we have mask? */ if (p[ptr - optr - 1] == '/') { /* mask len */ - l = strtol(ptr, &ap, 10); - if (*ap != 0 || l > ETHER_ADDR_LEN * 8 || l < 0) + long ml = strtol(ptr, &ap, 10); + if (*ap != 0 || ml > ETHER_ADDR_LEN * 8 || ml < 0) errx(EX_DATAERR, "Incorrect mask length"); - for (i = 0; l > 0 && i < ETHER_ADDR_LEN; l -= 8, i++) - mask[i] = (l >= 8) ? 0xff: (~0) << (8 - l); + for (i = 0; ml > 0 && i < ETHER_ADDR_LEN; ml -= 8, i++) + mask[i] = (ml >= 8) ? 0xff: (~0) << (8 - ml); } else { /* mask */ l = strlen(ptr); if (strspn(ptr, macset) != l || @@ -2325,7 +2361,7 @@ next_cmd(ipfw_insn *cmd) * Takes arguments and copies them into a comment */ static void -fill_comment(ipfw_insn *cmd, int ac, char **av) +fill_comment(ipfw_insn *cmd, char **av) { int i, l; char *p = (char *)(cmd + 1); @@ -2334,7 +2370,7 @@ fill_comment(ipfw_insn *cmd, int ac, char **av) cmd->len = (cmd->len & (F_NOT | F_OR)); /* Compute length of comment string. */ - for (i = 0, l = 0; i < ac; i++) + for (i = 0, l = 0; av[i] != NULL; i++) l += strlen(av[i]) + 1; if (l == 0) return; @@ -2343,7 +2379,7 @@ fill_comment(ipfw_insn *cmd, int ac, char **av) "comment too long (max 80 chars)"); l = 1 + (l+3)/4; cmd->len = (cmd->len & (F_NOT | F_OR)) | l; - for (i = 0; i < ac; i++) { + for (i = 0; av[i] != NULL; i++) { strcpy(p, av[i]); p += strlen(av[i]); *p++ = ' '; @@ -2368,11 +2404,11 @@ fill_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, int flags, uint16_t arg) * two microinstructions, and returns the pointer to the last one. */ static ipfw_insn * -add_mac(ipfw_insn *cmd, int ac, char *av[]) +add_mac(ipfw_insn *cmd, char *av[]) { ipfw_insn_mac *mac; - if (ac < 2) + if ( ( av[0] == NULL ) || ( av[1] == NULL ) ) errx(EX_DATAERR, "MAC dst src"); cmd->opcode = O_MACADDR2; @@ -2386,9 +2422,9 @@ add_mac(ipfw_insn *cmd, int ac, char *av[]) } static ipfw_insn * -add_mactype(ipfw_insn *cmd, int ac, char *av) +add_mactype(ipfw_insn *cmd, char *av) { - if (ac < 1) + if (!av) errx(EX_DATAERR, "missing MAC type"); if (strcmp(av, "any") != 0) { /* we have a non-null type */ fill_newports((ipfw_insn_u16 *)cmd, av, IPPROTO_ETHERTYPE); @@ -2496,6 +2532,7 @@ add_dstip(ipfw_insn *cmd, char *av) static ipfw_insn * add_ports(ipfw_insn *cmd, char *av, u_char proto, int opcode) { + /* XXX "any" is trapped before. Perhaps "to" */ if (_substrcmp(av, "any") == 0) { return NULL; } else if (fill_newports((ipfw_insn_u16 *)cmd, av, proto)) { @@ -2519,11 +2556,11 @@ add_src(ipfw_insn *cmd, char *av, u_char proto) *ch = '\0'; if (proto == IPPROTO_IPV6 || strcmp(av, "me6") == 0 || - inet_pton(AF_INET6, host, &a)) + inet_pton(AF_INET6, host, &a) == 1) ret = add_srcip6(cmd, av); /* XXX: should check for IPv4, not !IPv6 */ if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 || - !inet_pton(AF_INET6, host, &a))) + inet_pton(AF_INET6, host, &a) != 1)) ret = add_srcip(cmd, av); if (ret == NULL && strcmp(av, "any") != 0) ret = cmd; @@ -2545,11 +2582,11 @@ add_dst(ipfw_insn *cmd, char *av, u_char proto) *ch = '\0'; if (proto == IPPROTO_IPV6 || strcmp(av, "me6") == 0 || - inet_pton(AF_INET6, host, &a)) + inet_pton(AF_INET6, host, &a) == 1) ret = add_dstip6(cmd, av); /* XXX: should check for IPv4, not !IPv6 */ if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 || - !inet_pton(AF_INET6, host, &a))) + inet_pton(AF_INET6, host, &a) != 1)) ret = add_dstip(cmd, av); if (ret == NULL && strcmp(av, "any") != 0) ret = cmd; @@ -2571,7 +2608,7 @@ add_dst(ipfw_insn *cmd, char *av, u_char proto) * */ void -ipfw_add(int ac, char *av[]) +ipfw_add(char *av[]) { /* * rules are added into the 'rulebuf' and then copied in @@ -2610,37 +2647,36 @@ ipfw_add(int ac, char *av[]) cmd = (ipfw_insn *)cmdbuf; action = (ipfw_insn *)actbuf; - av++; ac--; + av++; /* [rule N] -- Rule number optional */ - if (ac && isdigit(**av)) { + if (av[0] && isdigit(**av)) { rule->rulenum = atoi(*av); av++; - ac--; } /* [set N] -- set number (0..RESVD_SET), optional */ - if (ac > 1 && _substrcmp(*av, "set") == 0) { + if (av[0] && !av[1] && _substrcmp(*av, "set") == 0) { int set = strtoul(av[1], NULL, 10); if (set < 0 || set > RESVD_SET) errx(EX_DATAERR, "illegal set %s", av[1]); rule->set = set; - av += 2; ac -= 2; + av += 2; } /* [prob D] -- match probability, optional */ - if (ac > 1 && _substrcmp(*av, "prob") == 0) { + if (av[0] && av[1] && _substrcmp(*av, "prob") == 0) { match_prob = strtod(av[1], NULL); if (match_prob <= 0 || match_prob > 1) errx(EX_DATAERR, "illegal match prob. %s", av[1]); - av += 2; ac -= 2; + av += 2; } /* action -- mandatory */ NEED1("missing action"); i = match_token(rule_actions, *av); - ac--; av++; + av++; action->len = 1; /* default */ switch(i) { case TOK_CHECKSTATE: @@ -2676,14 +2712,14 @@ ipfw_add(int ac, char *av[]) action->opcode = O_REJECT; NEED1("missing reject code"); fill_reject_code(&action->arg1, *av); - ac--; av++; + av++; break; case TOK_UNREACH6: action->opcode = O_UNREACH6; NEED1("missing unreach code"); fill_unreach6_code(&action->arg1, *av); - ac--; av++; + av++; break; case TOK_COUNT: @@ -2716,7 +2752,7 @@ ipfw_add(int ac, char *av[]) case TOK_TEE: action->opcode = O_TEE; chkarg: - if (!ac) + if (!av[0]) errx(EX_USAGE, "missing argument for %s", *(av - 1)); if (isdigit(**av)) { action->arg1 = strtoul(*av, NULL, 10); @@ -2735,7 +2771,7 @@ chkarg: errx(EX_DATAERR, "illegal divert/tee port"); } else errx(EX_DATAERR, "illegal argument for %s", *(av - 1)); - ac--; av++; + av++; break; case TOK_FORWARD: { @@ -2773,13 +2809,13 @@ chkarg: p->sa.sin_addr.s_addr = INADDR_ANY; else lookup_host(*av, &(p->sa.sin_addr)); - ac--; av++; + av++; break; } case TOK_COMMENT: /* pretend it is a 'count' rule followed by the comment */ action->opcode = O_COUNT; - ac++; av--; /* go back... */ + av--; /* go back... */ break; case TOK_SETFIB: @@ -2794,7 +2830,7 @@ chkarg: errx(EX_DATAERR, "fibs not suported.\n"); if (action->arg1 >= numfibs) /* Temporary */ errx(EX_DATAERR, "fib too large.\n"); - ac--; av++; + av++; break; } @@ -2814,8 +2850,8 @@ chkarg: * If they exist, it go first in the cmdbuf, but then it is * skipped in the copy section to the end of the buffer. */ - while (ac != 0 && (i = match_token(rule_action_params, *av)) != -1) { - ac--; av++; + while (av[0] != NULL && (i = match_token(rule_action_params, *av)) != -1) { + av++; switch (i) { case TOK_LOG: { @@ -2828,15 +2864,15 @@ chkarg: have_log = (ipfw_insn *)c; cmd->len = F_INSN_SIZE(ipfw_insn_log); cmd->opcode = O_LOG; - if (ac && _substrcmp(*av, "logamount") == 0) { - ac--; av++; + if (av[0] && _substrcmp(*av, "logamount") == 0) { + av++; NEED1("logamount requires argument"); l = atoi(*av); if (l < 0) errx(EX_DATAERR, "logamount must be positive"); c->max_log = l; - ac--; av++; + av++; } else { len = sizeof(c->max_log); if (sysctlbyname("net.inet.ip.fw.verbose_limit", @@ -2847,6 +2883,7 @@ chkarg: } break; +#ifndef NO_ALTQ case TOK_ALTQ: { ipfw_insn_altq *a = (ipfw_insn_altq *)cmd; @@ -2859,9 +2896,10 @@ chkarg: cmd->len = F_INSN_SIZE(ipfw_insn_altq); cmd->opcode = O_ALTQ; a->qid = altq_name_to_qid(*av); - ac--; av++; + av++; } break; +#endif case TOK_TAG: case TOK_UNTAG: { @@ -2874,7 +2912,7 @@ chkarg: rule_action_params); have_tag = cmd; fill_cmd(cmd, O_TAG, (i == TOK_TAG) ? 0: F_NOT, tag); - ac--; av++; + av++; break; } @@ -2888,13 +2926,13 @@ chkarg: goto done; #define OR_START(target) \ - if (ac && (*av[0] == '(' || *av[0] == '{')) { \ + if (av[0] && (*av[0] == '(' || *av[0] == '{')) { \ if (open_par) \ errx(EX_USAGE, "nested \"(\" not allowed\n"); \ prev = NULL; \ open_par = 1; \ if ( (av[0])[1] == '\0') { \ - ac--; av++; \ + av++; \ } else \ (*av)++; \ } \ @@ -2903,30 +2941,30 @@ chkarg: #define CLOSE_PAR \ if (open_par) { \ - if (ac && ( \ + if (av[0] && ( \ strcmp(*av, ")") == 0 || \ strcmp(*av, "}") == 0)) { \ prev = NULL; \ open_par = 0; \ - ac--; av++; \ + av++; \ } else \ errx(EX_USAGE, "missing \")\"\n"); \ } #define NOT_BLOCK \ - if (ac && _substrcmp(*av, "not") == 0) { \ + if (av[0] && _substrcmp(*av, "not") == 0) { \ if (cmd->len & F_NOT) \ errx(EX_USAGE, "double \"not\" not allowed\n"); \ cmd->len |= F_NOT; \ - ac--; av++; \ + av++; \ } #define OR_BLOCK(target) \ - if (ac && _substrcmp(*av, "or") == 0) { \ + if (av[0] && _substrcmp(*av, "or") == 0) { \ if (prev == NULL || open_par == 0) \ errx(EX_DATAERR, "invalid OR block"); \ prev->len |= F_OR; \ - ac--; av++; \ + av++; \ goto target; \ } \ CLOSE_PAR; @@ -2943,15 +2981,15 @@ chkarg: NEED1("missing protocol"); if (_substrcmp(*av, "MAC") == 0 || _substrcmp(*av, "mac") == 0) { - ac--; av++; /* the "MAC" keyword */ - add_mac(cmd, ac, av); /* exits in case of errors */ + av++; /* the "MAC" keyword */ + add_mac(cmd, av); /* exits in case of errors */ cmd = next_cmd(cmd); - ac -= 2; av += 2; /* dst-mac and src-mac */ + av += 2; /* dst-mac and src-mac */ NOT_BLOCK; NEED1("missing mac type"); - if (add_mactype(cmd, ac, av[0])) + if (add_mactype(cmd, av[0])) cmd = next_cmd(cmd); - ac--; av++; /* any or mac-type */ + av++; /* any or mac-type */ goto read_options; } #endif @@ -2963,7 +3001,7 @@ chkarg: NOT_BLOCK; NEED1("missing protocol"); if (add_proto_compat(cmd, *av, &proto)) { - av++; ac--; + av++; if (F_LEN(cmd) != 0) { prev = cmd; cmd = next_cmd(cmd); @@ -2977,9 +3015,9 @@ chkarg: /* * "from", mandatory */ - if (!ac || _substrcmp(*av, "from") != 0) + if ((av[0] == NULL) || _substrcmp(*av, "from") != 0) errx(EX_USAGE, "missing ``from''"); - ac--; av++; + av++; /* * source IP, mandatory @@ -2988,7 +3026,7 @@ chkarg: NOT_BLOCK; /* optional "not" */ NEED1("missing source address"); if (add_src(cmd, *av, proto)) { - ac--; av++; + av++; if (F_LEN(cmd) != 0) { /* ! any */ prev = cmd; cmd = next_cmd(cmd); @@ -3001,10 +3039,10 @@ chkarg: * source ports, optional */ NOT_BLOCK; /* optional "not" */ - if (ac) { + if ( av[0] != NULL ) { if (_substrcmp(*av, "any") == 0 || add_ports(cmd, *av, proto, O_IP_SRCPORT)) { - ac--; av++; + av++; if (F_LEN(cmd) != 0) cmd = next_cmd(cmd); } @@ -3013,9 +3051,9 @@ chkarg: /* * "to", mandatory */ - if (!ac || _substrcmp(*av, "to") != 0) + if ( (av[0] == NULL) || _substrcmp(*av, "to") != 0 ) errx(EX_USAGE, "missing ``to''"); - av++; ac--; + av++; /* * destination, mandatory @@ -3024,7 +3062,7 @@ chkarg: NOT_BLOCK; /* optional "not" */ NEED1("missing dst address"); if (add_dst(cmd, *av, proto)) { - ac--; av++; + av++; if (F_LEN(cmd) != 0) { /* ! any */ prev = cmd; cmd = next_cmd(cmd); @@ -3037,17 +3075,17 @@ chkarg: * dest. ports, optional */ NOT_BLOCK; /* optional "not" */ - if (ac) { + if (av[0]) { if (_substrcmp(*av, "any") == 0 || add_ports(cmd, *av, proto, O_IP_DSTPORT)) { - ac--; av++; + av++; if (F_LEN(cmd) != 0) cmd = next_cmd(cmd); } } read_options: - if (ac && first_cmd == cmd) { + if (av[0] && first_cmd == cmd) { /* * nothing specified so far, store in the rule to ease * printout later. @@ -3055,7 +3093,7 @@ read_options: rule->_pad = 1; } prev = NULL; - while (ac) { + while ( av[0] != NULL ) { char *s; ipfw_insn_u32 *cmd32; /* alias for cmd */ @@ -3069,7 +3107,7 @@ read_options: s++; } i = match_token(rule_options, s); - ac--; av++; + av++; switch(i) { case TOK_NOT: if (cmd->len & F_NOT) @@ -3131,7 +3169,7 @@ read_options: NEED1("recv, xmit, via require interface name" " or address"); fill_iface((ipfw_insn_if *)cmd, av[0]); - ac--; av++; + av++; if (F_LEN(cmd) == 0) /* not a valid address */ break; if (i == TOK_XMIT) @@ -3145,13 +3183,13 @@ read_options: case TOK_ICMPTYPES: NEED1("icmptypes requires list of types"); fill_icmptypes((ipfw_insn_u32 *)cmd, *av); - av++; ac--; + av++; break; case TOK_ICMP6TYPES: NEED1("icmptypes requires list of types"); fill_icmp6types((ipfw_insn_icmp6 *)cmd, *av); - av++; ac--; + av++; break; case TOK_IPTTL: @@ -3161,7 +3199,7 @@ read_options: errx(EX_DATAERR, "invalid ipttl %s", *av); } else fill_cmd(cmd, O_IPTTL, 0, strtoul(*av, NULL, 0)); - ac--; av++; + av++; break; case TOK_IPID: @@ -3171,7 +3209,7 @@ read_options: errx(EX_DATAERR, "invalid ipid %s", *av); } else fill_cmd(cmd, O_IPID, 0, strtoul(*av, NULL, 0)); - ac--; av++; + av++; break; case TOK_IPLEN: @@ -3181,32 +3219,32 @@ read_options: errx(EX_DATAERR, "invalid ip len %s", *av); } else fill_cmd(cmd, O_IPLEN, 0, strtoul(*av, NULL, 0)); - ac--; av++; + av++; break; case TOK_IPVER: NEED1("ipver requires version"); fill_cmd(cmd, O_IPVER, 0, strtoul(*av, NULL, 0)); - ac--; av++; + av++; break; case TOK_IPPRECEDENCE: NEED1("ipprecedence requires value"); fill_cmd(cmd, O_IPPRECEDENCE, 0, (strtoul(*av, NULL, 0) & 7) << 5); - ac--; av++; + av++; break; case TOK_IPOPTS: NEED1("missing argument for ipoptions"); fill_flags(cmd, O_IPOPT, f_ipopts, *av); - ac--; av++; + av++; break; case TOK_IPTOS: NEED1("missing argument for iptos"); fill_flags(cmd, O_IPTOS, f_iptos, *av); - ac--; av++; + av++; break; case TOK_UID: @@ -3223,7 +3261,7 @@ read_options: errx(EX_DATAERR, "uid \"%s\" nonexistent", *av); cmd32->d[0] = pwd->pw_uid; cmd->len |= F_INSN_SIZE(ipfw_insn_u32); - ac--; av++; + av++; } break; @@ -3241,7 +3279,7 @@ read_options: errx(EX_DATAERR, "gid \"%s\" nonexistent", *av); cmd32->d[0] = grp->gr_gid; cmd->len |= F_INSN_SIZE(ipfw_insn_u32); - ac--; av++; + av++; } break; @@ -3257,7 +3295,7 @@ read_options: errx(EX_DATAERR, "jail requires prison ID"); cmd32->d[0] = (uint32_t)jid; cmd->len |= F_INSN_SIZE(ipfw_insn_u32); - ac--; av++; + av++; } break; @@ -3278,13 +3316,13 @@ read_options: } else fill_cmd(cmd, O_TCPDATALEN, 0, strtoul(*av, NULL, 0)); - ac--; av++; + av++; break; case TOK_TCPOPTS: NEED1("missing argument for tcpoptions"); fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av); - ac--; av++; + av++; break; case TOK_TCPSEQ: @@ -3293,21 +3331,21 @@ read_options: cmd->len = F_INSN_SIZE(ipfw_insn_u32); cmd->opcode = (i == TOK_TCPSEQ) ? O_TCPSEQ : O_TCPACK; cmd32->d[0] = htonl(strtoul(*av, NULL, 0)); - ac--; av++; + av++; break; case TOK_TCPWIN: NEED1("tcpwin requires length"); fill_cmd(cmd, O_TCPWIN, 0, htons(strtoul(*av, NULL, 0))); - ac--; av++; + av++; break; case TOK_TCPFLAGS: NEED1("missing argument for tcpflags"); cmd->opcode = O_TCPFLAGS; fill_flags(cmd, O_TCPFLAGS, f_tcpflags, *av); - ac--; av++; + av++; break; case TOK_KEEPSTATE: @@ -3337,11 +3375,11 @@ read_options: cmd->opcode = O_LIMIT; c->limit_mask = c->conn_limit = 0; - while (ac > 0) { + while ( av[0] != NULL ) { if ((val = match_token(limit_masks, *av)) <= 0) break; c->limit_mask |= val; - ac--; av++; + av++; } if (c->limit_mask == 0) @@ -3350,14 +3388,14 @@ read_options: GET_UINT_ARG(c->conn_limit, IPFW_ARG_MIN, IPFW_ARG_MAX, TOK_LIMIT, rule_options); - ac--; av++; + av++; break; } case TOK_PROTO: NEED1("missing protocol"); if (add_proto(cmd, *av, &proto)) { - ac--; av++; + av++; } else errx(EX_DATAERR, "invalid protocol ``%s''", *av); @@ -3366,28 +3404,28 @@ read_options: case TOK_SRCIP: NEED1("missing source IP"); if (add_srcip(cmd, *av)) { - ac--; av++; + av++; } break; case TOK_DSTIP: NEED1("missing destination IP"); if (add_dstip(cmd, *av)) { - ac--; av++; + av++; } break; case TOK_SRCIP6: NEED1("missing source IP6"); if (add_srcip6(cmd, *av)) { - ac--; av++; + av++; } break; case TOK_DSTIP6: NEED1("missing destination IP6"); if (add_dstip6(cmd, *av)) { - ac--; av++; + av++; } break; @@ -3395,7 +3433,7 @@ read_options: NEED1("missing source port"); if (_substrcmp(*av, "any") == 0 || add_ports(cmd, *av, proto, O_IP_SRCPORT)) { - ac--; av++; + av++; } else errx(EX_DATAERR, "invalid source port %s", *av); break; @@ -3404,23 +3442,22 @@ read_options: NEED1("missing destination port"); if (_substrcmp(*av, "any") == 0 || add_ports(cmd, *av, proto, O_IP_DSTPORT)) { - ac--; av++; + av++; } else errx(EX_DATAERR, "invalid destination port %s", *av); break; case TOK_MAC: - if (add_mac(cmd, ac, av)) { - ac -= 2; av += 2; - } + if (add_mac(cmd, av)) + av += 2; break; case TOK_MACTYPE: NEED1("missing mac type"); - if (!add_mactype(cmd, ac, *av)) + if (!add_mactype(cmd, *av)) errx(EX_DATAERR, "invalid mac type %s", *av); - ac--; av++; + av++; break; case TOK_VERREVPATH: @@ -3449,7 +3486,7 @@ read_options: case TOK_EXT6HDR: fill_ext6hdr( cmd, *av ); - ac--; av++; + av++; break; case TOK_FLOWID: @@ -3457,17 +3494,16 @@ read_options: errx( EX_USAGE, "flow-id filter is active " "only for ipv6 protocol\n"); fill_flow6( (ipfw_insn_u32 *) cmd, *av ); - ac--; av++; + av++; break; case TOK_COMMENT: - fill_comment(cmd, ac, av); - av += ac; - ac = 0; + fill_comment(cmd, av); + av[0]=NULL; break; case TOK_TAGGED: - if (ac > 0 && strpbrk(*av, "-,")) { + if (av[0] && strpbrk(*av, "-,")) { if (!add_ports(cmd, *av, 0, O_TAGGED)) errx(EX_DATAERR, "tagged: invalid tag" " list: %s", *av); @@ -3479,13 +3515,38 @@ read_options: TOK_TAGGED, rule_options); fill_cmd(cmd, O_TAGGED, 0, tag); } - ac--; av++; + av++; break; case TOK_FIB: NEED1("fib requires fib number"); fill_cmd(cmd, O_FIB, 0, strtoul(*av, NULL, 0)); - ac--; av++; + av++; + break; + + case TOK_LOOKUP: { + ipfw_insn_u32 *c = (ipfw_insn_u32 *)cmd; + char *p; + int j; + + if (!av[0] || !av[1]) + errx(EX_USAGE, "format: lookup argument tablenum"); + cmd->opcode = O_IP_DST_LOOKUP; + cmd->len |= F_INSN_SIZE(ipfw_insn) + 2; + i = match_token(rule_options, *av); + for (j = 0; lookup_key[j] >= 0 ; j++) { + if (i == lookup_key[j]) + break; + } + if (lookup_key[j] <= 0) + errx(EX_USAGE, "format: cannot lookup on %s", *av); + c->d[1] = j; // i converted to option + av++; + cmd->arg1 = strtoul(*av, &p, 0); + if (p && *p) + errx(EX_USAGE, "format: lookup argument tablenum"); + av++; + } break; default: @@ -3662,6 +3723,10 @@ ipfw_flush(int force) if (c == 'N') /* user said no */ return; } + if (co.do_pipe) { + dummynet_flush(); + return; + } /* `ipfw set N flush` - is the same that `ipfw delete set N` */ if (co.use_set) { uint32_t arg = ((co.use_set - 1) & 0xffff) | (1 << 24); @@ -3775,14 +3840,14 @@ ipfw_table_handler(int ac, char *av[]) } } } else if (_substrcmp(*av, "flush") == 0) { - a = is_all ? tables_max : (ent.tbl + 1); + a = is_all ? tables_max : (uint32_t)(ent.tbl + 1); do { if (do_cmd(IP_FW_TABLE_FLUSH, &ent.tbl, sizeof(ent.tbl)) < 0) err(EX_OSERR, "setsockopt(IP_FW_TABLE_FLUSH)"); } while (++ent.tbl < a); } else if (_substrcmp(*av, "list") == 0) { - a = is_all ? tables_max : (ent.tbl + 1); + a = is_all ? tables_max : (uint32_t)(ent.tbl + 1); do { table_list(ent, is_all); } while (++ent.tbl < a); diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h index d3ce7fb..d172984 100644 --- a/sbin/ipfw/ipfw2.h +++ b/sbin/ipfw/ipfw2.h @@ -35,7 +35,7 @@ struct cmdline_opts { int do_resolv; /* try to resolve all ip to names */ int do_time; /* Show time stamps */ int do_quiet; /* Be quiet in add and flush */ - int do_pipe; /* this cmd refers to a pipe */ + int do_pipe; /* this cmd refers to a pipe/queue/sched */ int do_nat; /* this cmd refers to a nat config */ int do_dynamic; /* display dynamic rules */ int do_expired; /* display expired dynamic rules */ @@ -82,7 +82,10 @@ enum tokens { TOK_ACCEPT, TOK_COUNT, TOK_PIPE, + TOK_LINK, TOK_QUEUE, + TOK_FLOWSET, + TOK_SCHED, TOK_DIVERT, TOK_TEE, TOK_NETGRAPH, @@ -122,6 +125,7 @@ enum tokens { TOK_IPLEN, TOK_IPID, TOK_IPPRECEDENCE, + TOK_DSCP, TOK_IPTOS, TOK_IPTTL, TOK_IPVER, @@ -151,15 +155,23 @@ enum tokens { TOK_SRCPORT, TOK_ALL, TOK_MASK, + TOK_FLOW_MASK, + TOK_SCHED_MASK, TOK_BW, TOK_DELAY, - TOK_PIPE_PROFILE, + TOK_PROFILE, TOK_BURST, TOK_RED, TOK_GRED, TOK_DROPTAIL, TOK_PROTO, + /* dummynet tokens */ TOK_WEIGHT, + TOK_LMAX, + TOK_PRI, + TOK_TYPE, + TOK_SLOTSIZE, + TOK_IP, TOK_IF, TOK_ALOG, @@ -186,12 +198,14 @@ enum tokens { TOK_FIB, TOK_SETFIB, + TOK_LOOKUP, }; /* * the following macro returns an error message if we run out of * arguments. */ -#define NEED1(msg) {if (!ac) errx(EX_USAGE, msg);} +#define NEED(_p, msg) {if (!_p) errx(EX_USAGE, msg);} +#define NEED1(msg) {if (!(*av)) errx(EX_USAGE, msg);} unsigned long long align_uint64(const uint64_t *pll); @@ -235,14 +249,14 @@ struct _ipfw_insn_icmp6; extern int resvd_set_number; /* first-level command handlers */ -void ipfw_add(int ac, char *av[]); +void ipfw_add(char *av[]); void ipfw_show_nat(int ac, char **av); void ipfw_config_pipe(int ac, char **av); void ipfw_config_nat(int ac, char **av); -void ipfw_sets_handler(int ac, char *av[]); +void ipfw_sets_handler(char *av[]); void ipfw_table_handler(int ac, char *av[]); -void ipfw_sysctl_handler(int ac, char *av[], int which); -void ipfw_delete(int ac, char *av[]); +void ipfw_sysctl_handler(char *av[], int which); +void ipfw_delete(char *av[]); void ipfw_flush(int force); void ipfw_zero(int ac, char *av[], int optname); void ipfw_list(int ac, char *av[], int show_counters); @@ -254,7 +268,8 @@ u_int32_t altq_name_to_qid(const char *name); void print_altq_cmd(struct _ipfw_insn_altq *altqptr); /* dummynet.c */ -void ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]); +void dummynet_list(int ac, char *av[], int show_counters); +void dummynet_flush(void); int ipfw_delete_pipe(int pipe_or_queue, int n); /* ipv6.c */ diff --git a/sbin/ipfw/main.c b/sbin/ipfw/main.c index 3916057..cd39cf1 100644 --- a/sbin/ipfw/main.c +++ b/sbin/ipfw/main.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2003 Luigi Rizzo + * Copyright (c) 2002-2003,2010 Luigi Rizzo * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp * Copyright (c) 1994 Ugen J.S.Antsilevich * @@ -80,31 +80,27 @@ help(void) } /* - * Free a the (locally allocated) copy of command line arguments. - */ -static void -free_args(int ac, char **av) -{ - int i; - - for (i=0; i < ac; i++) - free(av[i]); - free(av); -} - -/* * Called with the arguments, including program name because getopt * wants it to be present. * Returns 0 if successful, 1 if empty command, errx() in case of errors. + * First thing we do is process parameters creating an argv[] array + * which includes the program name and a NULL entry at the end. + * If we are called with a single string, we split it on whitespace. + * Also, arguments with a trailing ',' are joined to the next one. + * The pointers (av[]) and data are in a a single chunk of memory. + * av[0] points to the original program name, all other entries + * point into the allocated chunk. */ static int ipfw_main(int oldac, char **oldav) { - int ch, ac, save_ac; + int ch, ac; const char *errstr; char **av, **save_av; int do_acct = 0; /* Show packet/byte count */ int try_next = 0; /* set if pipe cmd not found */ + int av_size; /* compute the av size */ + char *av_p; /* used to build the av list */ #define WHITESP " \t\f\v\n\r" if (oldac < 2) @@ -112,10 +108,9 @@ ipfw_main(int oldac, char **oldav) if (oldac == 2) { /* - * If we are called with a single string, try to split it into - * arguments for subsequent parsing. - * But first, remove spaces after a ',', by copying the string - * in-place. + * If we are called with one argument, try to split it into + * words for subsequent parsing. Spaces after a ',' are + * removed by copying the string in-place. */ char *arg = oldav[1]; /* The string is the first arg. */ int l = strlen(arg); @@ -150,31 +145,59 @@ ipfw_main(int oldac, char **oldav) ac++; /* - * Allocate the argument list, including one entry for - * the program name because getopt expects it. + * Allocate the argument list structure as a single block + * of memory, containing pointers and the argument + * strings. We include one entry for the program name + * because getopt expects it, and a NULL at the end + * to simplify further parsing. */ - av = safe_calloc(ac + 1, sizeof(char *)); + ac++; /* add 1 for the program name */ + av_size = (ac+1) * sizeof(char *) + l + 1; + av = safe_calloc(av_size, 1); /* - * Second, copy arguments from arg[] to av[]. For each one, + * Init the argument pointer to the end of the array + * and copy arguments from arg[] to av[]. For each one, * j is the initial character, i is the one past the end. */ - for (ac = 1, i = j = 0; i < l; i++) + av_p = (char *)&av[ac+1]; + for (ac = 1, i = j = 0; i < l; i++) { if (index(WHITESP, arg[i]) != NULL || i == l-1) { if (i == l-1) i++; - av[ac] = safe_calloc(i-j+1, 1); - bcopy(arg+j, av[ac], i-j); + bcopy(arg+j, av_p, i-j); + av[ac] = av_p; + av_p += i-j; /* the lenght of the string */ + *av_p++ = '\0'; ac++; j = i + 1; } + } } else { /* * If an argument ends with ',' join with the next one. */ - int first, i, l; + int first, i, l=0; + + /* + * Allocate the argument list structure as a single block + * of memory, containing both pointers and the argument + * strings. We include some space for the program name + * because getopt expects it. + * We add an extra pointer to the end of the array, + * to make simpler further parsing. + */ + for (i=0; i= 2 && !strcmp(av[1], "sysctl")) { + char *s; + int i; + + if (ac != 3) { + printf( "sysctl emulation usage:\n" + " ipfw sysctl name[=value]\n" + " ipfw sysctl -a\n"); + return 0; + } + s = index(av[2], '='); + if (s == NULL) { + s = !strcmp(av[2], "-a") ? NULL : av[2]; + sysctlbyname(s, NULL, NULL, NULL, 0); + } else { /* ipfw sysctl x.y.z=value */ + /* assume an INT value, will extend later */ + if (s[1] == '\0') { + printf("ipfw sysctl: missing value\n\n"); + return 0; + } + *s = '\0'; + i = strtol(s+1, NULL, 0); + sysctlbyname(av[2], NULL, NULL, &i, sizeof(int)); + } + return 0; + } +#endif + /* Save arguments for final freeing of memory. */ - save_ac = ac; save_av = av; optind = optreset = 1; /* restart getopt() */ @@ -232,7 +290,7 @@ ipfw_main(int oldac, char **oldav) break; case 'h': /* help */ - free_args(save_ac, save_av); + free(save_av); help(); break; /* NOTREACHED */ @@ -273,7 +331,7 @@ ipfw_main(int oldac, char **oldav) break; default: - free_args(save_ac, save_av); + free(save_av); return 1; } @@ -304,6 +362,10 @@ ipfw_main(int oldac, char **oldav) co.do_pipe = 1; else if (_substrcmp(*av, "queue") == 0) co.do_pipe = 2; + else if (_substrcmp(*av, "flowset") == 0) + co.do_pipe = 2; + else if (_substrcmp(*av, "sched") == 0) + co.do_pipe = 3; else if (!strncmp(*av, "set", strlen(*av))) { if (ac > 1 && isdigit(av[1][0])) { co.use_set = strtonum(av[1], 0, resvd_set_number, @@ -335,7 +397,7 @@ ipfw_main(int oldac, char **oldav) if (co.use_set == 0) { if (_substrcmp(*av, "add") == 0) - ipfw_add(ac, av); + ipfw_add(av); else if (co.do_nat && _substrcmp(*av, "show") == 0) ipfw_show_nat(ac, av); else if (co.do_pipe && _substrcmp(*av, "config") == 0) @@ -343,20 +405,20 @@ ipfw_main(int oldac, char **oldav) else if (co.do_nat && _substrcmp(*av, "config") == 0) ipfw_config_nat(ac, av); else if (_substrcmp(*av, "set") == 0) - ipfw_sets_handler(ac, av); + ipfw_sets_handler(av); else if (_substrcmp(*av, "table") == 0) ipfw_table_handler(ac, av); else if (_substrcmp(*av, "enable") == 0) - ipfw_sysctl_handler(ac, av, 1); + ipfw_sysctl_handler(av, 1); else if (_substrcmp(*av, "disable") == 0) - ipfw_sysctl_handler(ac, av, 0); + ipfw_sysctl_handler(av, 0); else try_next = 1; } if (co.use_set || try_next) { if (_substrcmp(*av, "delete") == 0) - ipfw_delete(ac, av); + ipfw_delete(av); else if (_substrcmp(*av, "flush") == 0) ipfw_flush(co.do_force); else if (_substrcmp(*av, "zero") == 0) @@ -373,7 +435,7 @@ ipfw_main(int oldac, char **oldav) } /* Free memory allocated in the argument parsing. */ - free_args(save_ac, save_av); + free(save_av); return 0; } @@ -521,6 +583,20 @@ ipfw_readfile(int ac, char *av[]) int main(int ac, char *av[]) { +#if defined(_WIN32) && defined(TCC) + { + WSADATA wsaData; + int ret=0; + unsigned short wVersionRequested = MAKEWORD(2, 2); + ret = WSAStartup(wVersionRequested, &wsaData); + if (ret != 0) { + /* Tell the user that we could not find a usable */ + /* Winsock DLL. */ + printf("WSAStartup failed with error: %d\n", ret); + return 1; + } + } +#endif /* * If the last argument is an absolute pathname, interpret it * as a file to be preprocessed. -- cgit v1.1