diff options
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/ip_dummynet.h | 3 | ||||
-rw-r--r-- | sys/netinet/ip_fw.h | 2 | ||||
-rw-r--r-- | sys/netinet/ipfw/ip_dummynet.c | 18 | ||||
-rw-r--r-- | sys/netinet/ipfw/ip_fw2.c | 229 | ||||
-rw-r--r-- | sys/netinet/ipfw/ip_fw_log.c | 3 | ||||
-rw-r--r-- | sys/netinet/ipfw/ip_fw_pfil.c | 22 | ||||
-rw-r--r-- | sys/netinet/ipfw/ip_fw_private.h | 29 | ||||
-rw-r--r-- | sys/netinet/ipfw/ip_fw_sockopt.c | 478 |
8 files changed, 395 insertions, 389 deletions
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h index b5ef19e..9d5223f 100644 --- a/sys/netinet/ip_dummynet.h +++ b/sys/netinet/ip_dummynet.h @@ -112,7 +112,8 @@ struct dn_heap { * processing requirements. */ struct dn_pkt_tag { - struct ip_fw *rule; /* matching rule */ + uint32_t slot; /* slot of next rule to use */ + uint32_t rulenum; /* matching rule number */ uint32_t rule_id; /* matching rule id */ uint32_t chain_id; /* ruleset id */ int dn_dir; /* action when packet comes out. */ diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index 34540fd..21a79ec 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -461,7 +461,7 @@ typedef struct _ipfw_insn_icmp6 { */ struct ip_fw { - struct ip_fw *next; /* linked list of rules */ + struct ip_fw *x_next; /* linked list of rules */ struct ip_fw *next_rule; /* ptr to next [skipto] rule */ /* 'next_rule' is used to pass up 'set_disable' status */ diff --git a/sys/netinet/ipfw/ip_dummynet.c b/sys/netinet/ipfw/ip_dummynet.c index 6f8dc4d..a4f9668 100644 --- a/sys/netinet/ipfw/ip_dummynet.c +++ b/sys/netinet/ipfw/ip_dummynet.c @@ -1367,20 +1367,11 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) struct dn_pipe *pipe; uint64_t len = m->m_pkthdr.len; struct dn_flow_queue *q = NULL; - int is_pipe; - ipfw_insn *cmd = ACTION_PTR(fwa->rule); + int is_pipe = fwa->cookie & 0x8000000 ? 0 : 1; KASSERT(m->m_nextpkt == NULL, ("dummynet_io: mbuf queue passed to dummynet")); - if (cmd->opcode == O_LOG) - cmd += F_LEN(cmd); - if (cmd->opcode == O_ALTQ) - cmd += F_LEN(cmd); - if (cmd->opcode == O_TAG) - cmd += F_LEN(cmd); - is_pipe = (cmd->opcode == O_PIPE); - DUMMYNET_LOCK(); io_pkt++; /* @@ -1390,11 +1381,11 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) * below can be simplified. */ if (is_pipe) { - pipe = locate_pipe(fwa->cookie); + pipe = locate_pipe(fwa->cookie & 0xffff); if (pipe != NULL) fs = &(pipe->fs); } else - fs = locate_flowset(fwa->cookie); + fs = locate_flowset(fwa->cookie & 0xffff); if (fs == NULL) goto dropit; /* This queue/pipe does not exist! */ @@ -1440,7 +1431,8 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) * Ok, i can handle the pkt now... * Build and enqueue packet + parameters. */ - pkt->rule = fwa->rule; + pkt->slot = fwa->slot; + pkt->rulenum = fwa->rulenum; pkt->rule_id = fwa->rule_id; pkt->chain_id = fwa->chain_id; pkt->dn_dir = dir; diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c index ca998c1..b19ece4 100644 --- a/sys/netinet/ipfw/ip_fw2.c +++ b/sys/netinet/ipfw/ip_fw2.c @@ -628,31 +628,6 @@ send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip) args->m = NULL; } -/** - * Return the pointer to the skipto target. - * - * IMPORTANT: this should only be called on SKIPTO rules, and the - * jump target is taken from the 'rulenum' argument, which may come - * from the rule itself (direct skipto) or not (tablearg) - * - * The function never returns NULL: if the requested rule is not - * present, it returns the next rule in the chain. - * This also happens in case of a bogus argument > 65535 - */ -static struct ip_fw * -lookup_next_rule(struct ip_fw *me, uint32_t rulenum) -{ - struct ip_fw *rule; - - for (rule = me->next; rule ; rule = rule->next) { - if (rule->rulenum >= rulenum) - break; - } - if (rule == NULL) /* failure or not a skipto */ - rule = me->next ? me->next : me; - return rule; -} - /* * Support for uid/gid/jail lookup. These tests are expensive * (because we may need to look into the list of active sockets) @@ -741,11 +716,13 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, * Helper function to write the matching rule into args */ static inline void -set_match(struct ip_fw_args *args, struct ip_fw *f, struct ip_fw_chain *chain) +set_match(struct ip_fw_args *args, int slot, + struct ip_fw_chain *chain) { - args->rule = f; - args->rule_id = f->id; args->chain_id = chain->id; + args->slot = slot + 1; /* we use 0 as a marker */ + args->rule_id = chain->map[slot]->id; + args->rulenum = chain->map[slot]->rulenum; } /* @@ -839,7 +816,7 @@ ipfw_chk(struct ip_fw_args *args) */ struct ifnet *oif = args->oif; - struct ip_fw *f = NULL; /* matching rule */ + int f_pos = 0; /* index of current rule in the array */ int retval = 0; /* @@ -1168,31 +1145,21 @@ do { \ return (IP_FW_PASS); /* accept */ } mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); - if (args->rule) { + if (args->slot) { /* * Packet has already been tagged as a result of a previous * match on rule args->rule aka args->rule_id (PIPE, QUEUE, * REASS, NETGRAPH and similar, never a skipto). - * Validate the pointer and continue from args->rule->next - * if still present, otherwise use the default rule. - * XXX If fw_one_pass != 0 then just accept it, though - * the caller should never pass us such packets. + * Validate the slot and continue from the next one + * if still present, otherwise do a lookup. */ if (V_fw_one_pass) { IPFW_RUNLOCK(chain); return (IP_FW_PASS); } - if (chain->id == args->chain_id) { /* pointer still valid */ - f = args->rule->next; - } else { /* must revalidate the pointer */ - for (f = chain->rules; f != NULL; f = f->next) - if (f == args->rule && f->id == args->rule_id) { - f = args->rule->next; - break; - } - } - if (f == NULL) /* in case of errors, use default; */ - f = chain->default_rule; + f_pos = (args->chain_id == chain->id) ? + args->slot /* already incremented */ : + ipfw_find_rule(chain, args->rulenum, args->rule_id+1); } else { /* * Find the starting rule. It can be either the first @@ -1200,18 +1167,13 @@ do { \ */ int skipto = mtag ? divert_cookie(mtag) : 0; - f = chain->rules; + f_pos = 0; if (args->eh == NULL && skipto != 0) { if (skipto >= IPFW_DEFAULT_RULE) { IPFW_RUNLOCK(chain); return (IP_FW_DENY); /* invalid */ } - while (f && f->rulenum <= skipto) - f = f->next; - if (f == NULL) { /* drop packet */ - IPFW_RUNLOCK(chain); - return (IP_FW_DENY); - } + f_pos = ipfw_find_rule(chain, skipto, 0); } } /* reset divert rule to avoid confusion later */ @@ -1227,7 +1189,7 @@ do { \ * need to break out of one or both loops, or re-enter one of * the loops with updated variables. Loop variables are: * - * f (outer loop) points to the current rule. + * f_pos (outer loop) points to the current rule. * On output it points to the matching rule. * done (outer loop) is used as a flag to break the loop. * l (inner loop) residual length of current rule. @@ -1236,15 +1198,16 @@ do { \ * We break the inner loop by setting l=0 and possibly * cmdlen=0 if we don't want to advance cmd. * We break the outer loop by setting done=1 - * We can restart the inner loop by setting l>0 and f, cmd + * We can restart the inner loop by setting l>0 and f_pos, f, cmd * as needed. */ - for (; f; f = f->next) { + for (; f_pos < chain->n_rules; f_pos++) { ipfw_insn *cmd; uint32_t tablearg = 0; int l, cmdlen, skip_or; /* skip rest of OR block */ + struct ip_fw *f; -/* again: */ + f = chain->map[f_pos]; if (V_set_disable & (1 << f->set) ) continue; @@ -1864,8 +1827,8 @@ do { \ * Exceptions: * O_COUNT and O_SKIPTO actions: * instead of terminating, we jump to the next rule - * (setting l=0), or to the SKIPTO target (by - * setting f, cmd and l as needed), respectively. + * (setting l=0), or to the SKIPTO target (setting + * f/f_len, cmd and l as needed), respectively. * * O_TAG, O_LOG and O_ALTQ action parameters: * perform some action and set match = 1; @@ -1926,7 +1889,14 @@ do { \ */ q->pcnt++; q->bcnt += pktlen; + /* XXX we would like to have f_pos + * readily accessible in the dynamic + * rule, instead of having to + * lookup q->rule. + */ f = q->rule; + f_pos = ipfw_find_rule(chain, + f->rulenum, f->id); cmd = ACTION_PTR(f); l = f->cmd_len - f->act_ofs; ipfw_dyn_unlock(); @@ -1952,9 +1922,11 @@ do { \ case O_PIPE: case O_QUEUE: - set_match(args, f, chain); + set_match(args, f_pos, chain); args->cookie = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg : cmd->arg1; + if (cmd->opcode == O_QUEUE) + args->cookie |= 0x80000000; retval = IP_FW_DUMMYNET; l = 0; /* exit inner loop */ done = 1; /* exit outer loop */ @@ -1987,38 +1959,53 @@ do { \ break; case O_COUNT: - case O_SKIPTO: f->pcnt++; /* update stats */ f->bcnt += pktlen; f->timestamp = time_uptime; - if (cmd->opcode == O_COUNT) { l = 0; /* exit inner loop */ break; - } - /* skipto: */ - if (cmd->arg1 == IP_FW_TABLEARG) { - f = lookup_next_rule(f, tablearg); - } else { /* direct skipto */ - /* update f->next_rule if not set */ - if (f->next_rule == NULL) + + case O_SKIPTO: + f->pcnt++; /* update stats */ + f->bcnt += pktlen; + f->timestamp = time_uptime; + /* If possible use cached f_pos (in f->next_rule), + * whose version is written in f->next_rule + * (horrible hacks to avoid changing the ABI). + */ + if (cmd->arg1 != IP_FW_TABLEARG && + (uint32_t)f->x_next == chain->id) { + f_pos = (uint32_t)f->next_rule; + } else { + int i = (cmd->arg1 == IP_FW_TABLEARG) ? + tablearg : cmd->arg1; + /* make sure we do not jump backward */ + if (i <= f->rulenum) + i = f->rulenum + 1; + f_pos = ipfw_find_rule(chain, i, 0); + /* update the cache */ + if (cmd->arg1 != IP_FW_TABLEARG) { f->next_rule = - lookup_next_rule(f, cmd->arg1); - f = f->next_rule; + (void *)(uintptr_t)f_pos; + f->x_next = + (void *)(uintptr_t)chain->id; + } } /* - * Skip disabled rules, and - * re-enter the inner loop - * with the correct f, l and cmd. + * Skip disabled rules, and re-enter + * the inner loop with the correct + * f_pos, f, l and cmd. * Also clear cmdlen and skip_or */ - while (f && (V_set_disable & (1 << f->set))) - f = f->next; - if (f) { /* found a valid rule */ + for (; f_pos < chain->n_rules - 1 && + (V_set_disable & + (1 << chain->map[f_pos]->set)); + f_pos++) + ; + /* prepare to enter the inner loop */ + f = chain->map[f_pos]; l = f->cmd_len; cmd = f->cmd; - } else { /* should not happen */ - l = 0; /* exit inner loop */ - } match = 1; cmdlen = 0; skip_or = 0; @@ -2083,7 +2070,7 @@ do { \ case O_NETGRAPH: case O_NGTEE: - set_match(args, f, chain); + set_match(args, f_pos, chain); args->cookie = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg : cmd->arg1; retval = (cmd->opcode == O_NETGRAPH) ? @@ -2108,7 +2095,7 @@ do { \ struct cfg_nat *t; int nat_id; - set_match(args, f, chain); + set_match(args, f_pos, chain); t = ((ipfw_insn_nat *)cmd)->nat; if (t == NULL) { nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? @@ -2175,7 +2162,7 @@ do { \ else ip->ip_sum = in_cksum(m, hlen); retval = IP_FW_REASS; - set_match(args, f, chain); + set_match(args, f_pos, chain); } done = 1; /* exit outer loop */ break; @@ -2209,10 +2196,11 @@ do { \ } /* end of outer for, scan rules */ if (done) { + struct ip_fw *rule = chain->map[f_pos]; /* Update statistics */ - f->pcnt++; - f->bcnt += pktlen; - f->timestamp = time_uptime; + rule->pcnt++; + rule->bcnt += pktlen; + rule->timestamp = time_uptime; } else { retval = IP_FW_DENY; printf("ipfw: ouch!, skip past end of rules, denying packet\n"); @@ -2308,7 +2296,7 @@ static int vnet_ipfw_init(const void *unused) { int error; - struct ip_fw default_rule; + struct ip_fw *rule = NULL; struct ip_fw_chain *chain; chain = &V_layer3_chain; @@ -2322,38 +2310,39 @@ vnet_ipfw_init(const void *unused) #ifdef IPFIREWALL_VERBOSE_LIMIT V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; #endif +#ifdef IPFIREWALL_NAT + LIST_INIT(&chain->nat); +#endif + /* insert the default rule and create the initial map */ + chain->n_rules = 1; + chain->static_len = sizeof(struct ip_fw); + chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_NOWAIT | M_ZERO); + if (chain->map) + rule = malloc(chain->static_len, M_IPFW, M_NOWAIT | M_ZERO); + if (rule == NULL) { + if (chain->map) + free(chain->map, M_IPFW); + printf("ipfw2: ENOSPC initializing default rule " + "(support disabled)\n"); + return (ENOSPC); + } error = ipfw_init_tables(chain); if (error) { panic("init_tables"); /* XXX Marko fix this ! */ } -#ifdef IPFIREWALL_NAT - LIST_INIT(&chain->nat); -#endif + /* fill and insert the default rule */ + rule->act_ofs = 0; + rule->rulenum = IPFW_DEFAULT_RULE; + rule->cmd_len = 1; + rule->set = RESVD_SET; + rule->cmd[0].len = 1; + rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; + chain->rules = chain->default_rule = chain->map[0] = rule; + chain->id = rule->id = 1; - chain->rules = NULL; IPFW_LOCK_INIT(chain); - - bzero(&default_rule, sizeof default_rule); - default_rule.act_ofs = 0; - default_rule.rulenum = IPFW_DEFAULT_RULE; - default_rule.cmd_len = 1; - default_rule.set = RESVD_SET; - default_rule.cmd[0].len = 1; - default_rule.cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; - error = ipfw_add_rule(chain, &default_rule); - - if (error != 0) { - printf("ipfw2: error %u initializing default rule " - "(support disabled)\n", error); - IPFW_LOCK_DESTROY(chain); - printf("leaving ipfw_iattach (1) with error %d\n", error); - return (error); - } - - chain->default_rule = chain->rules; - ipfw_dyn_init(); /* First set up some values that are compile time options */ @@ -2385,8 +2374,9 @@ vnet_ipfw_init(const void *unused) static int vnet_ipfw_uninit(const void *unused) { - struct ip_fw *reap; + struct ip_fw *reap, *rule; struct ip_fw_chain *chain = &V_layer3_chain; + int i; V_ipfw_vnet_ready = 0; /* tell new callers to go away */ /* @@ -2400,19 +2390,26 @@ vnet_ipfw_uninit(const void *unused) #endif V_ip_fw_chk_ptr = NULL; V_ip_fw_ctl_ptr = NULL; + IPFW_UH_WLOCK(chain); + IPFW_UH_WUNLOCK(chain); + IPFW_UH_WLOCK(chain); IPFW_WLOCK(chain); - /* We wait on the wlock here until the last user leaves */ IPFW_WUNLOCK(chain); IPFW_WLOCK(chain); ipfw_dyn_uninit(0); /* run the callout_drain */ ipfw_flush_tables(chain); - chain->reap = NULL; - ipfw_free_chain(chain, 1 /* kill default rule */); - reap = chain->reap; - chain->reap = NULL; + reap = NULL; + for (i = 0; i < chain->n_rules; i++) { + rule = chain->map[i]; + rule->x_next = reap; + reap = rule; + } + if (chain->map) + free(chain->map, M_IPFW); IPFW_WUNLOCK(chain); + IPFW_UH_WUNLOCK(chain); if (reap != NULL) ipfw_reap_rules(reap); IPFW_LOCK_DESTROY(chain); diff --git a/sys/netinet/ipfw/ip_fw_log.c b/sys/netinet/ipfw/ip_fw_log.c index cd404d4..e3515af 100644 --- a/sys/netinet/ipfw/ip_fw_log.c +++ b/sys/netinet/ipfw/ip_fw_log.c @@ -26,9 +26,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#define DEB(x) -#define DDB(x) x - /* * Logging support for ipfw */ diff --git a/sys/netinet/ipfw/ip_fw_pfil.c b/sys/netinet/ipfw/ip_fw_pfil.c index e1dcf16..35ee157 100644 --- a/sys/netinet/ipfw/ip_fw_pfil.c +++ b/sys/netinet/ipfw/ip_fw_pfil.c @@ -125,7 +125,8 @@ ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, if (ng_tag != NULL) { KASSERT(ng_tag->dir == NG_IPFW_IN, ("ng_ipfw tag with wrong direction")); - args.rule = ng_tag->rule; + args.slot = ng_tag->slot; + args.rulenum = ng_tag->rulenum; args.rule_id = ng_tag->rule_id; args.chain_id = ng_tag->chain_id; m_tag_delete(*m0, (struct m_tag *)ng_tag); @@ -137,10 +138,10 @@ again: struct dn_pkt_tag *dt; dt = (struct dn_pkt_tag *)(dn_tag+1); - args.rule = dt->rule; + args.slot = dt->slot; + args.rulenum = dt->rulenum; args.rule_id = dt->rule_id; args.chain_id = dt->chain_id; - m_tag_delete(*m0, dn_tag); } @@ -148,7 +149,7 @@ again: args.inp = inp; tee = 0; - if (V_fw_one_pass == 0 || args.rule == NULL) { + if (V_fw_one_pass == 0 || args.slot == 0) { ipfw = ipfw_chk(&args); *m0 = args.m; } else @@ -201,7 +202,7 @@ again: *m0 = NULL; return 0; /* packet consumed */ } else { - args.rule = NULL; + args.slot = 0; goto again; /* continue with packet */ } @@ -258,7 +259,8 @@ ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, if (ng_tag != NULL) { KASSERT(ng_tag->dir == NG_IPFW_OUT, ("ng_ipfw tag with wrong direction")); - args.rule = ng_tag->rule; + args.slot = ng_tag->slot; + args.rulenum = ng_tag->rulenum; args.rule_id = ng_tag->rule_id; args.chain_id = ng_tag->chain_id; m_tag_delete(*m0, (struct m_tag *)ng_tag); @@ -270,10 +272,10 @@ again: struct dn_pkt_tag *dt; dt = (struct dn_pkt_tag *)(dn_tag+1); - args.rule = dt->rule; + args.slot = dt->slot; + args.rulenum = dt->rulenum; args.rule_id = dt->rule_id; args.chain_id = dt->chain_id; - m_tag_delete(*m0, dn_tag); } @@ -282,7 +284,7 @@ again: args.inp = inp; tee = 0; - if (V_fw_one_pass == 0 || args.rule == NULL) { + if (V_fw_one_pass == 0 || args.slot == 0) { ipfw = ipfw_chk(&args); *m0 = args.m; } else @@ -341,7 +343,7 @@ again: *m0 = NULL; return 0; /* packet consumed */ } else { - args.rule = NULL; + args.slot = 0; goto again; /* continue with packet */ } diff --git a/sys/netinet/ipfw/ip_fw_private.h b/sys/netinet/ipfw/ip_fw_private.h index 9c67d20..da5db8e 100644 --- a/sys/netinet/ipfw/ip_fw_private.h +++ b/sys/netinet/ipfw/ip_fw_private.h @@ -79,7 +79,12 @@ struct ip_fw_args { struct ifnet *oif; /* output interface */ struct sockaddr_in *next_hop; /* forward address */ - struct ip_fw *rule; /* matching rule */ + /* chain_id validates 'slot', the location of the pointer to + * a matching rule. + * If invalid, we can lookup the rule using rule_id and rulenum + */ + uint32_t slot; /* slot for matching rule */ + uint32_t rulenum; /* matching rule number */ uint32_t rule_id; /* matching rule id */ uint32_t chain_id; /* ruleset id */ @@ -178,9 +183,11 @@ struct ip_fw_chain { struct ip_fw *default_rule; int n_rules; /* number of static rules */ int static_len; /* total len of static rules */ + struct ip_fw **map; /* array of rule ptrs to ease lookup */ LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */ struct radix_node_head *tables[IPFW_TABLES_MAX]; struct rwlock rwmtx; + struct rwlock uh_lock; /* lock for upper half */ uint32_t id; /* ruleset id */ }; @@ -191,9 +198,16 @@ struct sockopt; /* used by tcp_var.h */ * so the variable and the macros must be here. */ -#define IPFW_LOCK_INIT(_chain) \ - rw_init(&(_chain)->rwmtx, "IPFW static rules") -#define IPFW_LOCK_DESTROY(_chain) rw_destroy(&(_chain)->rwmtx) +#define IPFW_LOCK_INIT(_chain) do { \ + rw_init(&(_chain)->rwmtx, "IPFW static rules"); \ + rw_init(&(_chain)->uh_lock, "IPFW UH lock"); \ + } while (0) + +#define IPFW_LOCK_DESTROY(_chain) do { \ + rw_destroy(&(_chain)->rwmtx); \ + rw_destroy(&(_chain)->uh_lock); \ + } while (0) + #define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED) #define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx) @@ -201,12 +215,17 @@ struct sockopt; /* used by tcp_var.h */ #define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx) #define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx) +#define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock) +#define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock) +#define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock) +#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock) + /* In ip_fw_sockopt.c */ +int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id); int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule); int ipfw_ctl(struct sockopt *sopt); int ipfw_chk(struct ip_fw_args *args); void ipfw_reap_rules(struct ip_fw *head); -void ipfw_free_chain(struct ip_fw_chain *chain, int kill_default); /* In ip_fw_table.c */ struct radix_node; diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c index 49618da..061c3f9 100644 --- a/sys/netinet/ipfw/ip_fw_sockopt.c +++ b/sys/netinet/ipfw/ip_fw_sockopt.c @@ -1,5 +1,7 @@ /*- - * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa + * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa + * + * Supported by: Valeria Paoli * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,9 +28,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#define DEB(x) -#define DDB(x) x - /* * Sockopt support for ipfw. The routines here implement * the upper half of the ipfw code. @@ -81,136 +80,143 @@ MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); */ /* - * When a rule is added/deleted, clear the next_rule pointers in all rules. - * These will be reconstructed on the fly as packets are matched. + * Find the smallest rule >= key, id. + * We could use bsearch but it is so simple that we code it directly */ -static void -flush_rule_ptrs(struct ip_fw_chain *chain) +int +ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id) { - struct ip_fw *rule; + int i, lo, hi; + struct ip_fw *r; + + for (lo = 0, hi = chain->n_rules - 1; lo < hi;) { + i = (lo + hi) / 2; + r = chain->map[i]; + if (r->rulenum < key) + lo = i + 1; /* continue from the next one */ + else if (r->rulenum > key) + hi = i; /* this might be good */ + else if (r->id < id) + lo = i + 1; /* continue from the next one */ + else /* r->id >= id */ + hi = i; /* this might be good */ + }; + return hi; +} - IPFW_WLOCK_ASSERT(chain); +/* + * allocate a new map, returns the chain locked. extra is the number + * of entries to add or delete. + */ +static struct ip_fw ** +get_map(struct ip_fw_chain *chain, int extra, int locked) +{ - chain->id++; + for (;;) { + struct ip_fw **map; + int i; - for (rule = chain->rules; rule; rule = rule->next) - rule->next_rule = NULL; + i = chain->n_rules + extra; + map = malloc(i * sizeof(struct ip_fw *), M_IPFW, M_WAITOK); + if (map == NULL) { + printf("%s: cannot allocate map\n", __FUNCTION__); + return NULL; + } + if (!locked) + IPFW_UH_WLOCK(chain); + if (i >= chain->n_rules + extra) /* good */ + return map; + /* otherwise we lost the race, free and retry */ + if (!locked) + IPFW_UH_WUNLOCK(chain); + free(map, M_IPFW); + } +} + +/* + * swap the maps. It is supposed to be called with IPFW_UH_WLOCK + */ +static struct ip_fw ** +swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len) +{ + struct ip_fw **old_map; + + IPFW_WLOCK(chain); + chain->id++; + chain->n_rules = new_len; + old_map = chain->map; + chain->map = new_map; + IPFW_WUNLOCK(chain); + return old_map; } /* * Add a new rule to the list. Copy the rule into a malloc'ed area, then * possibly create a rule number and add the rule to the list. * Update the rule_number in the input struct so the caller knows it as well. + * XXX DO NOT USE FOR THE DEFAULT RULE. + * Must be called without IPFW_UH held */ int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) { - struct ip_fw *rule, *f, *prev; - int l = RULESIZE(input_rule); + struct ip_fw *rule; + int i, l, insert_before; + struct ip_fw **map; /* the new array of pointers */ - if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE) + if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1) return (EINVAL); - rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO); + l = RULESIZE(input_rule); + rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO); if (rule == NULL) return (ENOSPC); + /* get_map returns with IPFW_UH_WLOCK if successful */ + map = get_map(chain, 1, 0 /* not locked */); + if (map == NULL) { + free(rule, M_IPFW); + return ENOSPC; + } bcopy(input_rule, rule, l); - - rule->next = NULL; + /* clear fields not settable from userland */ + rule->x_next = NULL; rule->next_rule = NULL; - rule->pcnt = 0; rule->bcnt = 0; rule->timestamp = 0; - IPFW_WLOCK(chain); - - if (chain->rules == NULL) { /* default rule */ - chain->rules = rule; - rule->id = ++chain->id; - goto done; - } - - /* - * If rulenum is 0, find highest numbered rule before the - * default rule, and add autoinc_step - */ if (V_autoinc_step < 1) V_autoinc_step = 1; else if (V_autoinc_step > 1000) V_autoinc_step = 1000; + /* find the insertion point, we will insert before */ + insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE; + i = ipfw_find_rule(chain, insert_before, 0); + /* duplicate first part */ + if (i > 0) + bcopy(chain->map, map, i * sizeof(struct ip_fw *)); + map[i] = rule; + /* duplicate remaining part, we always have the default rule */ + bcopy(chain->map + i, map + i + 1, + sizeof(struct ip_fw *) *(chain->n_rules - i)); if (rule->rulenum == 0) { - /* - * locate the highest numbered rule before default - */ - for (f = chain->rules; f; f = f->next) { - if (f->rulenum == IPFW_DEFAULT_RULE) - break; - rule->rulenum = f->rulenum; - } + /* write back the number */ + rule->rulenum = i > 0 ? map[i-1]->rulenum : 0; if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) rule->rulenum += V_autoinc_step; input_rule->rulenum = rule->rulenum; } - /* - * Now insert the new rule in the right place in the sorted list. - */ - for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) { - if (f->rulenum > rule->rulenum) { /* found the location */ - if (prev) { - rule->next = f; - prev->next = rule; - } else { /* head insert */ - rule->next = chain->rules; - chain->rules = rule; - } - break; - } - } - flush_rule_ptrs(chain); - /* chain->id incremented inside flush_rule_ptrs() */ - rule->id = chain->id; -done: - chain->n_rules++; + rule->id = chain->id + 1; + map = swap_map(chain, map, chain->n_rules + 1); chain->static_len += l; - IPFW_WUNLOCK(chain); + IPFW_UH_WUNLOCK(chain); + if (map) + free(map, M_IPFW); return (0); } -/** - * Remove a static rule (including derived * dynamic rules) - * and place it on the ``reap list'' for later reclamation. - * The caller is in charge of clearing rule pointers to avoid - * dangling pointers. - * @return a pointer to the next entry. - * Arguments are not checked, so they better be correct. - */ -static struct ip_fw * -remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, - struct ip_fw *prev) -{ - struct ip_fw *n; - int l = RULESIZE(rule); - - IPFW_WLOCK_ASSERT(chain); - - n = rule->next; - ipfw_remove_dyn_children(rule); - if (prev == NULL) - chain->rules = n; - else - prev->next = n; - chain->n_rules--; - chain->static_len -= l; - - rule->next = chain->reap; - chain->reap = rule; - - return n; -} - /* * Reclaim storage associated with a list of rules. This is * typically the list created using remove_rule. @@ -222,34 +228,11 @@ ipfw_reap_rules(struct ip_fw *head) struct ip_fw *rule; while ((rule = head) != NULL) { - head = head->next; + head = head->x_next; free(rule, M_IPFW); } } -/* - * Remove all rules from a chain (except rules in set RESVD_SET - * unless kill_default = 1). The caller is responsible for - * reclaiming storage for the rules left in chain->reap. - */ -void -ipfw_free_chain(struct ip_fw_chain *chain, int kill_default) -{ - struct ip_fw *prev, *rule; - - IPFW_WLOCK_ASSERT(chain); - - chain->reap = NULL; - flush_rule_ptrs(chain); /* more efficient to do outside the loop */ - for (prev = NULL, rule = chain->rules; rule ; ) - if (kill_default || rule->set != RESVD_SET) - rule = remove_rule(chain, rule, prev); - else { - prev = rule; - rule = rule->next; - } -} - /** * Remove all rules with given number, and also do set manipulation. * Assumes chain != NULL && *chain != NULL. @@ -267,9 +250,12 @@ ipfw_free_chain(struct ip_fw_chain *chain, int kill_default) static int del_entry(struct ip_fw_chain *chain, u_int32_t arg) { - struct ip_fw *prev = NULL, *rule; - u_int16_t rulenum; /* rule or old_set */ - u_int8_t cmd, new_set; + struct ip_fw *rule; + uint32_t rulenum; /* rule or old_set */ + uint8_t cmd, new_set; + int start, end = 0, i, ofs, n; + struct ip_fw **map = NULL; + int error = 0; rulenum = arg & 0xffff; cmd = (arg >> 24) & 0xff; @@ -285,96 +271,122 @@ del_entry(struct ip_fw_chain *chain, u_int32_t arg) return EINVAL; } - IPFW_WLOCK(chain); - rule = chain->rules; /* common starting point */ + IPFW_UH_WLOCK(chain); /* prevent conflicts among the writers */ chain->reap = NULL; /* prepare for deletions */ + switch (cmd) { - case 0: /* delete rules with given number */ - /* - * locate first rule to delete + case 0: /* delete rules with given number (0 is special means all) */ + case 1: /* delete all rules with given set number, rule->set == rulenum */ + case 5: /* delete rules with given number and with given set number. + * rulenum - given rule number; + * new_set - given set number. */ - for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) - ; - if (rule->rulenum != rulenum) { - IPFW_WUNLOCK(chain); - return EINVAL; - } - - /* - * flush pointers outside the loop, then delete all matching - * rules. prev remains the same throughout the cycle. + /* locate first rule to delete (start), the one after the + * last one (end), and count how many rules to delete (n) */ - flush_rule_ptrs(chain); - while (rule->rulenum == rulenum) - rule = remove_rule(chain, rule, prev); + n = 0; + if (cmd == 1) { /* look for a specific set, must scan all */ + for (start = -1, i = 0; i < chain->n_rules; i++) { + if (chain->map[start]->set != rulenum) + continue; + if (start < 0) + start = i; + end = i; + n++; + } + end++; /* first non-matching */ + } else { + start = ipfw_find_rule(chain, rulenum, 0); + for (end = start; end < chain->n_rules; end++) { + rule = chain->map[end]; + if (rulenum > 0 && rule->rulenum != rulenum) + break; + if (rule->set != RESVD_SET && + (cmd == 0 || rule->set == new_set) ) + n++; + } + } + /* allocate the map, if needed */ + if (n > 0) + map = get_map(chain, -n, 1 /* locked */); + if (n == 0 || map == NULL) { + error = EINVAL; break; - - case 1: /* delete all rules with given set number */ - flush_rule_ptrs(chain); - while (rule->rulenum < IPFW_DEFAULT_RULE) { - if (rule->set == rulenum) - rule = remove_rule(chain, rule, prev); - else { - prev = rule; - rule = rule->next; + } + /* copy the initial part of the map */ + if (start > 0) + bcopy(chain->map, map, start * sizeof(struct ip_fw *)); + /* copy active rules between start and end */ + for (i = ofs = start; i < end; i++) { + rule = chain->map[i]; + if (!(rule->set != RESVD_SET && + (cmd == 0 || rule->set == new_set) )) + map[ofs++] = chain->map[i]; + } + /* finally the tail */ + bcopy(chain->map + end, map + ofs, + (chain->n_rules - end) * sizeof(struct ip_fw *)); + map = swap_map(chain, map, chain->n_rules - n); + /* now remove the rules deleted */ + for (i = start; i < end; i++) { + rule = map[i]; + if (rule->set != RESVD_SET && + (cmd == 0 || rule->set == new_set) ) { + int l = RULESIZE(rule); + + chain->static_len -= l; + ipfw_remove_dyn_children(rule); + rule->x_next = chain->reap; + chain->reap = rule; } } break; case 2: /* move rules with given number to new set */ - for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) + IPFW_UH_WLOCK(chain); + for (i = 0; i < chain->n_rules; i++) { + rule = chain->map[i]; if (rule->rulenum == rulenum) rule->set = new_set; + } + IPFW_UH_WUNLOCK(chain); break; case 3: /* move rules with given set number to new set */ - for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) + IPFW_UH_WLOCK(chain); + for (i = 0; i < chain->n_rules; i++) { + rule = chain->map[i]; if (rule->set == rulenum) rule->set = new_set; + } + IPFW_UH_WUNLOCK(chain); break; case 4: /* swap two sets */ - for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) + IPFW_UH_WLOCK(chain); + for (i = 0; i < chain->n_rules; i++) { + rule = chain->map[i]; if (rule->set == rulenum) rule->set = new_set; else if (rule->set == new_set) rule->set = rulenum; - break; - - case 5: /* delete rules with given number and with given set number. - * rulenum - given rule number; - * new_set - given set number. - */ - for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) - ; - if (rule->rulenum != rulenum) { - IPFW_WUNLOCK(chain); - return (EINVAL); - } - flush_rule_ptrs(chain); - while (rule->rulenum == rulenum) { - if (rule->set == new_set) - rule = remove_rule(chain, rule, prev); - else { - prev = rule; - rule = rule->next; - } } + IPFW_UH_WUNLOCK(chain); + break; } - /* - * Look for rules to reclaim. We grab the list before - * releasing the lock then reclaim them w/o the lock to - * avoid a LOR with dummynet. - */ rule = chain->reap; - IPFW_WUNLOCK(chain); + chain->reap = NULL; + IPFW_UH_WUNLOCK(chain); ipfw_reap_rules(rule); - return 0; + if (map) + free(map, M_IPFW); + return error; } /* * Clear counters for a specific rule. - * The enclosing "table" is assumed locked. + * Normally run under IPFW_UH_RLOCK, but these are idempotent ops + * so we only care that rules do not disappear. */ static void clear_counters(struct ip_fw *rule, int log_only) @@ -403,6 +415,7 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) { struct ip_fw *rule; char *msg; + int i; uint16_t rulenum = arg & 0xffff; uint8_t set = (arg >> 16) & 0xff; @@ -413,11 +426,12 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) if (cmd == 1 && set > RESVD_SET) return (EINVAL); - IPFW_WLOCK(chain); + IPFW_UH_RLOCK(chain); if (rulenum == 0) { V_norule_counter = 0; - for (rule = chain->rules; rule; rule = rule->next) { - /* Skip rules from another set. */ + for (i = 0; i < chain->n_rules; i++) { + rule = chain->map[i]; + /* Skip rules not in our set. */ if (cmd == 1 && rule->set != set) continue; clear_counters(rule, log_only); @@ -426,18 +440,14 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) "Accounting cleared"; } else { int cleared = 0; - /* - * We can have multiple rules with the same number, so we - * need to clear them all. - */ - for (rule = chain->rules; rule; rule = rule->next) + for (i = 0; i < chain->n_rules; i++) { + rule = chain->map[i]; if (rule->rulenum == rulenum) { - while (rule && rule->rulenum == rulenum) { if (cmd == 0 || rule->set == set) clear_counters(rule, log_only); - rule = rule->next; - } cleared = 1; + } + if (rule->rulenum > rulenum) break; } if (!cleared) { /* we did not find any matching rules */ @@ -446,7 +456,7 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) } msg = log_only ? "logging count reset" : "cleared"; } - IPFW_WUNLOCK(chain); + IPFW_UH_RUNLOCK(chain); if (V_fw_verbose) { int lev = LOG_SECURITY | LOG_NOTICE; @@ -497,7 +507,6 @@ check_ipfw_struct(struct ip_fw *rule, int size) cmd->opcode); return EINVAL; } - DEB(printf("ipfw: opcode %d\n", cmd->opcode);) switch (cmd->opcode) { case O_PROBE_STATE: case O_KEEP_STATE: @@ -771,43 +780,37 @@ bad_size: /* * Copy the static and dynamic rules to the supplied buffer * and return the amount of space actually used. + * Must be run under IPFW_UH_RLOCK */ static size_t ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) { char *bp = buf; char *ep = bp + space; - struct ip_fw *rule; - int i; + struct ip_fw *rule, *dst; + int l, i; time_t boot_seconds; boot_seconds = boottime.tv_sec; - /* XXX this can take a long time and locking will block packet flow */ - IPFW_RLOCK(chain); - for (rule = chain->rules; rule ; rule = rule->next) { - /* - * Verify the entry fits in the buffer in case the - * rules changed between calculating buffer space and - * now. This would be better done using a generation - * number but should suffice for now. - */ - i = RULESIZE(rule); - if (bp + i <= ep) { - bcopy(rule, bp, i); + for (i = 0; i < chain->n_rules; i++) { + rule = chain->map[i]; + l = RULESIZE(rule); + if (bp + l > ep) { /* should not happen */ + printf("overflow dumping static rules\n"); + break; + } + dst = (struct ip_fw *)bp; + bcopy(rule, dst, l); /* * XXX HACK. Store the disable mask in the "next" * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? */ - bcopy(&V_set_disable, - &(((struct ip_fw *)bp)->next_rule), - sizeof(V_set_disable)); - if (((struct ip_fw *)bp)->timestamp) - ((struct ip_fw *)bp)->timestamp += boot_seconds; - bp += i; + bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable)); + if (dst->timestamp) + dst->timestamp += boot_seconds; + bp += l; } - } - IPFW_RUNLOCK(chain); ipfw_get_dynamic(&bp, ep); /* protected by the dynamic lock */ return (bp - (char *)buf); } @@ -857,41 +860,33 @@ ipfw_ctl(struct sockopt *sopt) * change between calculating the size and returning the * data in which case we'll just return what fits. */ - size = chain->static_len; /* size of static rules */ - size += ipfw_dyn_len(); + for (;;) { + int len = 0, want; + size = chain->static_len; + size += ipfw_dyn_len(); if (size >= sopt->sopt_valsize) break; - /* - * XXX todo: if the user passes a short length just to know - * how much room is needed, do not bother filling up the - * buffer, just jump to the sooptcopyout. - */ buf = malloc(size, M_TEMP, M_WAITOK); - error = sooptcopyout(sopt, buf, - ipfw_getrules(chain, buf, size)); + if (buf == NULL) + break; + IPFW_UH_RLOCK(chain); + /* check again how much space we need */ + want = chain->static_len + ipfw_dyn_len(); + if (size >= want) + len = ipfw_getrules(chain, buf, size); + IPFW_UH_RUNLOCK(chain); + if (size >= want) + error = sooptcopyout(sopt, buf, len); free(buf, M_TEMP); + if (size >= want) + break; + } break; case IP_FW_FLUSH: - /* - * Normally we cannot release the lock on each iteration. - * We could do it here only because we start from the head all - * the times so there is no risk of missing some entries. - * On the other hand, the risk is that we end up with - * a very inconsistent ruleset, so better keep the lock - * around the whole cycle. - * - * XXX this code can be improved by resetting the head of - * the list to point to the default rule, and then freeing - * the old list without the need for a lock. - */ - - IPFW_WLOCK(chain); - ipfw_free_chain(chain, 0 /* keep default rule */); - rule = chain->reap; - IPFW_WUNLOCK(chain); - ipfw_reap_rules(rule); + /* locking is done within del_entry() */ + error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */ break; case IP_FW_ADD: @@ -901,6 +896,7 @@ ipfw_ctl(struct sockopt *sopt) if (error == 0) error = check_ipfw_struct(rule, sopt->sopt_valsize); if (error == 0) { + /* locking is done within ipfw_add_rule() */ error = ipfw_add_rule(chain, rule); size = RULESIZE(rule); if (!error && sopt->sopt_dir == SOPT_GET) @@ -931,9 +927,11 @@ ipfw_ctl(struct sockopt *sopt) /* delete or reassign, locking done in del_entry() */ error = del_entry(chain, rulenum[0]); } else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */ + IPFW_UH_WLOCK(chain); V_set_disable = (V_set_disable | rulenum[0]) & ~rulenum[1] & ~(1<<RESVD_SET); /* set RESVD_SET always enabled */ + IPFW_UH_WUNLOCK(chain); } else error = EINVAL; break; |