8 files changed, 395 insertions, 389 deletions
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h
index b5ef19e..9d5223f 100644
--- a/sys/netinet/ip_dummynet.h
+++ b/sys/netinet/ip_dummynet.h
@@ -112,7 +112,8 @@ struct dn_heap {
  * processing requirements.
  */
 struct dn_pkt_tag {
-    struct ip_fw *rule;		/* matching rule */
+    uint32_t slot;		/* slot of next rule to use */
+    uint32_t rulenum;		/* matching rule number */
     uint32_t rule_id;		/* matching rule id */
     uint32_t chain_id;		/* ruleset id */
     int dn_dir;			/* action when packet comes out. */
diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h
index 34540fd..21a79ec 100644
--- a/sys/netinet/ip_fw.h
+++ b/sys/netinet/ip_fw.h
@@ -461,7 +461,7 @@ typedef struct _ipfw_insn_icmp6 {
  */
 
 struct ip_fw {
-	struct ip_fw	*next;		/* linked list of rules		*/
+	struct ip_fw	*x_next;	/* linked list of rules		*/
 	struct ip_fw	*next_rule;	/* ptr to next [skipto] rule	*/
 	/* 'next_rule' is used to pass up 'set_disable' status		*/
 
diff --git a/sys/netinet/ipfw/ip_dummynet.c b/sys/netinet/ipfw/ip_dummynet.c
index 6f8dc4d..a4f9668 100644
--- a/sys/netinet/ipfw/ip_dummynet.c
+++ b/sys/netinet/ipfw/ip_dummynet.c
@@ -1367,20 +1367,11 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
 	struct dn_pipe *pipe;
 	uint64_t len = m->m_pkthdr.len;
 	struct dn_flow_queue *q = NULL;
-	int is_pipe;
-	ipfw_insn *cmd = ACTION_PTR(fwa->rule);
+	int is_pipe = fwa->cookie & 0x8000000 ? 0 : 1;
 
 	KASSERT(m->m_nextpkt == NULL,
 	    ("dummynet_io: mbuf queue passed to dummynet"));
 
-	if (cmd->opcode == O_LOG)
-		cmd += F_LEN(cmd);
-	if (cmd->opcode == O_ALTQ)
-		cmd += F_LEN(cmd);
-	if (cmd->opcode == O_TAG)
-		cmd += F_LEN(cmd);
-	is_pipe = (cmd->opcode == O_PIPE);
-
 	DUMMYNET_LOCK();
 	io_pkt++;
 	/*
@@ -1390,11 +1381,11 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
 	 * below can be simplified.
 	 */
 	if (is_pipe) {
-		pipe = locate_pipe(fwa->cookie);
+		pipe = locate_pipe(fwa->cookie & 0xffff);
 		if (pipe != NULL)
 			fs = &(pipe->fs);
 	} else
-		fs = locate_flowset(fwa->cookie);
+		fs = locate_flowset(fwa->cookie & 0xffff);
 
 	if (fs == NULL)
 		goto dropit;	/* This queue/pipe does not exist! */
@@ -1440,7 +1431,8 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
 	 * Ok, i can handle the pkt now...
 	 * Build and enqueue packet + parameters.
 	 */
-	pkt->rule = fwa->rule;
+	pkt->slot = fwa->slot;
+	pkt->rulenum = fwa->rulenum;
 	pkt->rule_id = fwa->rule_id;
 	pkt->chain_id = fwa->chain_id;
 	pkt->dn_dir = dir;
diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c
index ca998c1..b19ece4 100644
--- a/sys/netinet/ipfw/ip_fw2.c
+++ b/sys/netinet/ipfw/ip_fw2.c
@@ -628,31 +628,6 @@ send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip)
 	args->m = NULL;
 }
 
-/**
- * Return the pointer to the skipto target.
- *
- * IMPORTANT: this should only be called on SKIPTO rules, and the
- * jump target is taken from the 'rulenum' argument, which may come
- * from the rule itself (direct skipto) or not (tablearg)
- *
- * The function never returns NULL: if the requested rule is not
- * present, it returns the next rule in the chain.
- * This also happens in case of a bogus argument > 65535
- */
-static struct ip_fw *
-lookup_next_rule(struct ip_fw *me, uint32_t rulenum)
-{
-	struct ip_fw *rule;
-
-	for (rule = me->next; rule ; rule = rule->next) {
-		if (rule->rulenum >= rulenum)
-			break;
-	}
-	if (rule == NULL)	/* failure or not a skipto */
-		rule = me->next ? me->next : me;
-	return rule;
-}
-
 /*
  * Support for uid/gid/jail lookup. These tests are expensive
  * (because we may need to look into the list of active sockets)
@@ -741,11 +716,13 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
  * Helper function to write the matching rule into args
  */
 static inline void
-set_match(struct ip_fw_args *args, struct ip_fw *f, struct ip_fw_chain *chain)
+set_match(struct ip_fw_args *args, int slot,
+	struct ip_fw_chain *chain)
 {
-	args->rule = f;
-	args->rule_id = f->id;
 	args->chain_id = chain->id;
+	args->slot = slot + 1; /* we use 0 as a marker */
+	args->rule_id = chain->map[slot]->id;
+	args->rulenum = chain->map[slot]->rulenum;
 }
 
 /*
@@ -839,7 +816,7 @@ ipfw_chk(struct ip_fw_args *args)
 	 */
 	struct ifnet *oif = args->oif;
 
-	struct ip_fw *f = NULL;		/* matching rule */
+	int f_pos = 0;		/* index of current rule in the array */
 	int retval = 0;
 
 	/*
@@ -1168,31 +1145,21 @@ do {								\
 		return (IP_FW_PASS);	/* accept */
 	}
 	mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
-	if (args->rule) {
+	if (args->slot) {
 		/*
 		 * Packet has already been tagged as a result of a previous
 		 * match on rule args->rule aka args->rule_id (PIPE, QUEUE,
 		 * REASS, NETGRAPH and similar, never a skipto).
-		 * Validate the pointer and continue from args->rule->next
-		 * if still present, otherwise use the default rule.
-		 * XXX If fw_one_pass != 0 then just accept it, though
-		 * the caller should never pass us such packets.
+		 * Validate the slot and continue from the next one
+		 * if still present, otherwise do a lookup.
 		 */
 		if (V_fw_one_pass) {
 			IPFW_RUNLOCK(chain);
 			return (IP_FW_PASS);
 		}
-		if (chain->id == args->chain_id) { /* pointer still valid */
-			f = args->rule->next;
-		} else { /* must revalidate the pointer */
-			for (f = chain->rules; f != NULL; f = f->next)
-				if (f == args->rule && f->id == args->rule_id) {
-					f = args->rule->next;
-					break;
-				}
-		}
-		if (f == NULL) /* in case of errors, use default; */
-			f = chain->default_rule;
+		f_pos = (args->chain_id == chain->id) ?
+		    args->slot /* already incremented */ :
+		    ipfw_find_rule(chain, args->rulenum, args->rule_id+1);
 	} else {
 		/*
 		 * Find the starting rule. It can be either the first
@@ -1200,18 +1167,13 @@ do {								\
 		 */
 		int skipto = mtag ? divert_cookie(mtag) : 0;
 
-		f = chain->rules;
+		f_pos = 0;
 		if (args->eh == NULL && skipto != 0) {
 			if (skipto >= IPFW_DEFAULT_RULE) {
 				IPFW_RUNLOCK(chain);
 				return (IP_FW_DENY); /* invalid */
 			}
-			while (f && f->rulenum <= skipto)
-				f = f->next;
-			if (f == NULL) {	/* drop packet */
-				IPFW_RUNLOCK(chain);
-				return (IP_FW_DENY);
-			}
+			f_pos = ipfw_find_rule(chain, skipto, 0);
 		}
 	}
 	/* reset divert rule to avoid confusion later */
@@ -1227,7 +1189,7 @@ do {								\
 	 * need to break out of one or both loops, or re-enter one of
 	 * the loops with updated variables. Loop variables are:
 	 *
-	 *	f (outer loop) points to the current rule.
+	 *	f_pos (outer loop) points to the current rule.
 	 *		On output it points to the matching rule.
 	 *	done (outer loop) is used as a flag to break the loop.
 	 *	l (inner loop)	residual length of current rule.
@@ -1236,15 +1198,16 @@ do {								\
 	 * We break the inner loop by setting l=0 and possibly
 	 * cmdlen=0 if we don't want to advance cmd.
 	 * We break the outer loop by setting done=1
-	 * We can restart the inner loop by setting l>0 and f, cmd
+	 * We can restart the inner loop by setting l>0 and f_pos, f, cmd
 	 * as needed.
 	 */
-	for (; f; f = f->next) {
+	for (; f_pos < chain->n_rules; f_pos++) {
 		ipfw_insn *cmd;
 		uint32_t tablearg = 0;
 		int l, cmdlen, skip_or; /* skip rest of OR block */
+		struct ip_fw *f;
 
-/* again: */
+		f = chain->map[f_pos];
 		if (V_set_disable & (1 << f->set) )
 			continue;
 
@@ -1864,8 +1827,8 @@ do {								\
 			 * Exceptions:
 			 * O_COUNT and O_SKIPTO actions:
 			 *   instead of terminating, we jump to the next rule
-			 *   (setting l=0), or to the SKIPTO target (by
-			 *   setting f, cmd and l as needed), respectively.
+			 *   (setting l=0), or to the SKIPTO target (setting
+			 *   f/f_len, cmd and l as needed), respectively.
 			 *
 			 * O_TAG, O_LOG and O_ALTQ action parameters:
 			 *   perform some action and set match = 1;
@@ -1926,7 +1889,14 @@ do {								\
 					 */
 					q->pcnt++;
 					q->bcnt += pktlen;
+					/* XXX we would like to have f_pos
+					 * readily accessible in the dynamic
+				         * rule, instead of having to
+					 * lookup q->rule.
+					 */
 					f = q->rule;
+					f_pos = ipfw_find_rule(chain,
+						f->rulenum, f->id);
 					cmd = ACTION_PTR(f);
 					l = f->cmd_len - f->act_ofs;
 					ipfw_dyn_unlock();
@@ -1952,9 +1922,11 @@ do {								\
 
 			case O_PIPE:
 			case O_QUEUE:
-				set_match(args, f, chain);
+				set_match(args, f_pos, chain);
 				args->cookie = (cmd->arg1 == IP_FW_TABLEARG) ?
 					tablearg : cmd->arg1;
+				if (cmd->opcode == O_QUEUE)
+					args->cookie |= 0x80000000;
 				retval = IP_FW_DUMMYNET;
 				l = 0;          /* exit inner loop */
 				done = 1;       /* exit outer loop */
@@ -1987,38 +1959,53 @@ do {								\
 				break;
 
 			case O_COUNT:
-			case O_SKIPTO:
 				f->pcnt++;	/* update stats */
 				f->bcnt += pktlen;
 				f->timestamp = time_uptime;
-				if (cmd->opcode == O_COUNT) {
 					l = 0;		/* exit inner loop */
 					break;
-				}
-				/* skipto: */
-				if (cmd->arg1 == IP_FW_TABLEARG) {
-				    f = lookup_next_rule(f, tablearg);
-				} else { /* direct skipto */
-				    /* update f->next_rule if not set */
-				    if (f->next_rule == NULL)
+
+			case O_SKIPTO:
+			    f->pcnt++;	/* update stats */
+			    f->bcnt += pktlen;
+			    f->timestamp = time_uptime;
+			    /* If possible use cached f_pos (in f->next_rule),
+			     * whose version is written in f->next_rule
+			     * (horrible hacks to avoid changing the ABI).
+			     */
+			    if (cmd->arg1 != IP_FW_TABLEARG &&
+				    (uint32_t)f->x_next == chain->id) {
+				f_pos = (uint32_t)f->next_rule;
+			    } else {
+				int i = (cmd->arg1 == IP_FW_TABLEARG) ?
+					tablearg : cmd->arg1;
+				/* make sure we do not jump backward */
+				if (i <= f->rulenum)
+				    i = f->rulenum + 1;
+				f_pos = ipfw_find_rule(chain, i, 0);
+				/* update the cache */
+				if (cmd->arg1 != IP_FW_TABLEARG) {
 					f->next_rule =
-					    lookup_next_rule(f, cmd->arg1);
-				    f = f->next_rule;
+					(void *)(uintptr_t)f_pos;
+				    f->x_next =
+					(void *)(uintptr_t)chain->id;
+				}
 				}
 				/*
-				 * Skip disabled rules, and
-				 * re-enter the inner loop
-				 * with the correct f, l and cmd.
+			     * Skip disabled rules, and re-enter
+			     * the inner loop with the correct
+			     * f_pos, f, l and cmd.
 				 * Also clear cmdlen and skip_or
 				 */
-				while (f && (V_set_disable & (1 << f->set)))
-					f = f->next;
-				if (f) { /* found a valid rule */
+			    for (; f_pos < chain->n_rules - 1 &&
+				    (V_set_disable &
+				     (1 << chain->map[f_pos]->set));
+				    f_pos++)
+				;
+			    /* prepare to enter the inner loop */
+			    f = chain->map[f_pos];
 					l = f->cmd_len;
 					cmd = f->cmd;
-				} else { /* should not happen */
-					l = 0;  /* exit inner loop */
-				}
 				match = 1;
 				cmdlen = 0;
 				skip_or = 0;
@@ -2083,7 +2070,7 @@ do {								\
 
 			case O_NETGRAPH:
 			case O_NGTEE:
-				set_match(args, f, chain);
+				set_match(args, f_pos, chain);
 				args->cookie = (cmd->arg1 == IP_FW_TABLEARG) ?
 					tablearg : cmd->arg1;
 				retval = (cmd->opcode == O_NETGRAPH) ?
@@ -2108,7 +2095,7 @@ do {								\
 				    struct cfg_nat *t;
 				    int nat_id;
 
-				    set_match(args, f, chain);
+				    set_match(args, f_pos, chain);
 				    t = ((ipfw_insn_nat *)cmd)->nat;
 				    if (t == NULL) {
 					nat_id = (cmd->arg1 == IP_FW_TABLEARG) ?
@@ -2175,7 +2162,7 @@ do {								\
 				    else
 					ip->ip_sum = in_cksum(m, hlen);
 				    retval = IP_FW_REASS;
-				    set_match(args, f, chain);
+				    set_match(args, f_pos, chain);
 				}
 				done = 1;	/* exit outer loop */
 				break;
@@ -2209,10 +2196,11 @@ do {								\
 	}		/* end of outer for, scan rules */
 
 	if (done) {
+		struct ip_fw *rule = chain->map[f_pos];
 		/* Update statistics */
-		f->pcnt++;
-		f->bcnt += pktlen;
-		f->timestamp = time_uptime;
+		rule->pcnt++;
+		rule->bcnt += pktlen;
+		rule->timestamp = time_uptime;
 	} else {
 		retval = IP_FW_DENY;
 		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
@@ -2308,7 +2296,7 @@ static int
 vnet_ipfw_init(const void *unused)
 {
 	int error;
-	struct ip_fw default_rule;
+	struct ip_fw *rule = NULL;
 	struct ip_fw_chain *chain;
 
 	chain = &V_layer3_chain;
@@ -2322,38 +2310,39 @@ vnet_ipfw_init(const void *unused)
 #ifdef IPFIREWALL_VERBOSE_LIMIT
 	V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT;
 #endif
+#ifdef IPFIREWALL_NAT
+	LIST_INIT(&chain->nat);
+#endif
 
+	/* insert the default rule and create the initial map */
+	chain->n_rules = 1;
+	chain->static_len = sizeof(struct ip_fw);
+	chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_NOWAIT | M_ZERO);
+	if (chain->map)
+		rule = malloc(chain->static_len, M_IPFW, M_NOWAIT | M_ZERO);
+	if (rule == NULL) {
+		if (chain->map)
+			free(chain->map, M_IPFW);
+		printf("ipfw2: ENOSPC initializing default rule "
+			"(support disabled)\n");
+		return (ENOSPC);
+	}
 	error = ipfw_init_tables(chain);
 	if (error) {
 		panic("init_tables"); /* XXX Marko fix this ! */
 	}
-#ifdef IPFIREWALL_NAT
-	LIST_INIT(&chain->nat);
-#endif
 
+	/* fill and insert the default rule */
+	rule->act_ofs = 0;
+	rule->rulenum = IPFW_DEFAULT_RULE;
+	rule->cmd_len = 1;
+	rule->set = RESVD_SET;
+	rule->cmd[0].len = 1;
+	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
+	chain->rules = chain->default_rule = chain->map[0] = rule;
+	chain->id = rule->id = 1;
 
-	chain->rules = NULL;
 	IPFW_LOCK_INIT(chain);
-
-	bzero(&default_rule, sizeof default_rule);
-	default_rule.act_ofs = 0;
-	default_rule.rulenum = IPFW_DEFAULT_RULE;
-	default_rule.cmd_len = 1;
-	default_rule.set = RESVD_SET;
-	default_rule.cmd[0].len = 1;
-	default_rule.cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
-	error = ipfw_add_rule(chain, &default_rule);
-
-	if (error != 0) {
-		printf("ipfw2: error %u initializing default rule "
-			"(support disabled)\n", error);
-		IPFW_LOCK_DESTROY(chain);
-		printf("leaving ipfw_iattach (1) with error %d\n", error);
-		return (error);
-	}
-
-	chain->default_rule = chain->rules;
-
 	ipfw_dyn_init();
 
 	/* First set up some values that are compile time options */
@@ -2385,8 +2374,9 @@ vnet_ipfw_init(const void *unused)
 static int
 vnet_ipfw_uninit(const void *unused)
 {
-	struct ip_fw *reap;
+	struct ip_fw *reap, *rule;
 	struct ip_fw_chain *chain = &V_layer3_chain;
+	int i;
 
 	V_ipfw_vnet_ready = 0; /* tell new callers to go away */
 	/*
@@ -2400,19 +2390,26 @@ vnet_ipfw_uninit(const void *unused)
 #endif
 	V_ip_fw_chk_ptr = NULL;
 	V_ip_fw_ctl_ptr = NULL;
+	IPFW_UH_WLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
+	IPFW_UH_WLOCK(chain);
 
 	IPFW_WLOCK(chain);
-	/* We wait on the wlock here until the last user leaves */
 	IPFW_WUNLOCK(chain);
 	IPFW_WLOCK(chain);
 
 	ipfw_dyn_uninit(0);	/* run the callout_drain */
 	ipfw_flush_tables(chain);
-	chain->reap = NULL;
-	ipfw_free_chain(chain, 1 /* kill default rule */);
-	reap = chain->reap;
-	chain->reap = NULL;
+	reap = NULL;
+	for (i = 0; i < chain->n_rules; i++) {
+		rule = chain->map[i];
+		rule->x_next = reap;
+		reap = rule;
+	}
+	if (chain->map)
+		free(chain->map, M_IPFW);
 	IPFW_WUNLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
 	if (reap != NULL)
 		ipfw_reap_rules(reap);
 	IPFW_LOCK_DESTROY(chain);
diff --git a/sys/netinet/ipfw/ip_fw_log.c b/sys/netinet/ipfw/ip_fw_log.c
index cd404d4..e3515af 100644
--- a/sys/netinet/ipfw/ip_fw_log.c
+++ b/sys/netinet/ipfw/ip_fw_log.c
@@ -26,9 +26,6 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#define        DEB(x)
-#define        DDB(x) x
-
 /*
  * Logging support for ipfw
  */
diff --git a/sys/netinet/ipfw/ip_fw_pfil.c b/sys/netinet/ipfw/ip_fw_pfil.c
index e1dcf16..35ee157 100644
--- a/sys/netinet/ipfw/ip_fw_pfil.c
+++ b/sys/netinet/ipfw/ip_fw_pfil.c
@@ -125,7 +125,8 @@ ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
 	if (ng_tag != NULL) {
 		KASSERT(ng_tag->dir == NG_IPFW_IN,
 		    ("ng_ipfw tag with wrong direction"));
-		args.rule = ng_tag->rule;
+		args.slot = ng_tag->slot;
+		args.rulenum = ng_tag->rulenum;
 		args.rule_id = ng_tag->rule_id;
 		args.chain_id = ng_tag->chain_id;
 		m_tag_delete(*m0, (struct m_tag *)ng_tag);
@@ -137,10 +138,10 @@ again:
 		struct dn_pkt_tag *dt;
 
 		dt = (struct dn_pkt_tag *)(dn_tag+1);
-		args.rule = dt->rule;
+		args.slot = dt->slot;
+		args.rulenum = dt->rulenum;
 		args.rule_id = dt->rule_id;
 		args.chain_id = dt->chain_id;
-
 		m_tag_delete(*m0, dn_tag);
 	}
 
@@ -148,7 +149,7 @@ again:
 	args.inp = inp;
 	tee = 0;
 
-	if (V_fw_one_pass == 0 || args.rule == NULL) {
+	if (V_fw_one_pass == 0 || args.slot == 0) {
 		ipfw = ipfw_chk(&args);
 		*m0 = args.m;
 	} else
@@ -201,7 +202,7 @@ again:
 			*m0 = NULL;
 			return 0;	/* packet consumed */
 		} else {
-			args.rule = NULL;
+			args.slot = 0;
 			goto again;	/* continue with packet */
 		}
 
@@ -258,7 +259,8 @@ ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
 	if (ng_tag != NULL) {
 		KASSERT(ng_tag->dir == NG_IPFW_OUT,
 		    ("ng_ipfw tag with wrong direction"));
-		args.rule = ng_tag->rule;
+		args.slot = ng_tag->slot;
+		args.rulenum = ng_tag->rulenum;
 		args.rule_id = ng_tag->rule_id;
 		args.chain_id = ng_tag->chain_id;
 		m_tag_delete(*m0, (struct m_tag *)ng_tag);
@@ -270,10 +272,10 @@ again:
 		struct dn_pkt_tag *dt;
 
 		dt = (struct dn_pkt_tag *)(dn_tag+1);
-		args.rule = dt->rule;
+		args.slot = dt->slot;
+		args.rulenum = dt->rulenum;
 		args.rule_id = dt->rule_id;
 		args.chain_id = dt->chain_id;
-
 		m_tag_delete(*m0, dn_tag);
 	}
 
@@ -282,7 +284,7 @@ again:
 	args.inp = inp;
 	tee = 0;
 
-	if (V_fw_one_pass == 0 || args.rule == NULL) {
+	if (V_fw_one_pass == 0 || args.slot == 0) {
 		ipfw = ipfw_chk(&args);
 		*m0 = args.m;
 	} else
@@ -341,7 +343,7 @@ again:
 			*m0 = NULL;
 			return 0;	/* packet consumed */
 		} else {
-			args.rule = NULL;
+			args.slot = 0;
 			goto again;	/* continue with packet */
 		}
 
diff --git a/sys/netinet/ipfw/ip_fw_private.h b/sys/netinet/ipfw/ip_fw_private.h
index 9c67d20..da5db8e 100644
--- a/sys/netinet/ipfw/ip_fw_private.h
+++ b/sys/netinet/ipfw/ip_fw_private.h
@@ -79,7 +79,12 @@ struct ip_fw_args {
 	struct ifnet	*oif;		/* output interface		*/
 	struct sockaddr_in *next_hop;	/* forward address		*/
 
-	struct ip_fw	*rule;		/* matching rule		*/
+	/* chain_id validates 'slot', the location of the pointer to
+	 * a matching rule.
+	 * If invalid, we can lookup the rule using rule_id and rulenum
+	 */
+	uint32_t	slot;		/* slot for matching rule	*/
+	uint32_t	rulenum;	/* matching rule number		*/
 	uint32_t	rule_id;	/* matching rule id		*/
 	uint32_t	chain_id;	/* ruleset id			*/
 
@@ -178,9 +183,11 @@ struct ip_fw_chain {
 	struct ip_fw	*default_rule;
 	int		n_rules;	/* number of static rules */
 	int		static_len;	/* total len of static rules */
+	struct ip_fw    **map;	/* array of rule ptrs to ease lookup */
 	LIST_HEAD(nat_list, cfg_nat) nat;       /* list of nat entries */
 	struct radix_node_head *tables[IPFW_TABLES_MAX];
 	struct rwlock	rwmtx;
+	struct rwlock	uh_lock;	/* lock for upper half */
 	uint32_t	id;		/* ruleset id */
 };
 
@@ -191,9 +198,16 @@ struct sockopt;	/* used by tcp_var.h */
  * so the variable and the macros must be here.
  */
 
-#define	IPFW_LOCK_INIT(_chain) \
-	rw_init(&(_chain)->rwmtx, "IPFW static rules")
-#define	IPFW_LOCK_DESTROY(_chain)	rw_destroy(&(_chain)->rwmtx)
+#define	IPFW_LOCK_INIT(_chain) do {			\
+	rw_init(&(_chain)->rwmtx, "IPFW static rules");	\
+	rw_init(&(_chain)->uh_lock, "IPFW UH lock");	\
+	} while (0)
+
+#define	IPFW_LOCK_DESTROY(_chain) do {			\
+	rw_destroy(&(_chain)->rwmtx);			\
+	rw_destroy(&(_chain)->uh_lock);			\
+	} while (0)
+
 #define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
 #define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
@@ -201,12 +215,17 @@ struct sockopt;	/* used by tcp_var.h */
 #define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
 #define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
 
+#define IPFW_UH_RLOCK(p) rw_rlock(&(p)->uh_lock)
+#define IPFW_UH_RUNLOCK(p) rw_runlock(&(p)->uh_lock)
+#define IPFW_UH_WLOCK(p) rw_wlock(&(p)->uh_lock)
+#define IPFW_UH_WUNLOCK(p) rw_wunlock(&(p)->uh_lock)
+
 /* In ip_fw_sockopt.c */
+int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
 int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
 int ipfw_ctl(struct sockopt *sopt);
 int ipfw_chk(struct ip_fw_args *args);
 void ipfw_reap_rules(struct ip_fw *head);
-void ipfw_free_chain(struct ip_fw_chain *chain, int kill_default);
 
 /* In ip_fw_table.c */
 struct radix_node;
diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c
index 49618da..061c3f9 100644
--- a/sys/netinet/ipfw/ip_fw_sockopt.c
+++ b/sys/netinet/ipfw/ip_fw_sockopt.c
@@ -1,5 +1,7 @@
 /*-
- * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Supported by: Valeria Paoli
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -26,9 +28,6 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#define        DEB(x)
-#define        DDB(x) x
-
 /*
  * Sockopt support for ipfw. The routines here implement
  * the upper half of the ipfw code.
@@ -81,136 +80,143 @@ MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
  */
 
 /*
- * When a rule is added/deleted, clear the next_rule pointers in all rules.
- * These will be reconstructed on the fly as packets are matched.
+ * Find the smallest rule >= key, id.
+ * We could use bsearch but it is so simple that we code it directly
  */
-static void
-flush_rule_ptrs(struct ip_fw_chain *chain)
+int
+ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
 {
-	struct ip_fw *rule;
+	int i, lo, hi;
+	struct ip_fw *r;
+
+  	for (lo = 0, hi = chain->n_rules - 1; lo < hi;) {
+		i = (lo + hi) / 2;
+		r = chain->map[i];
+		if (r->rulenum < key)
+			lo = i + 1;	/* continue from the next one */
+		else if (r->rulenum > key)
+			hi = i;		/* this might be good */
+		else if (r->id < id)
+			lo = i + 1;	/* continue from the next one */
+		else /* r->id >= id */
+			hi = i;		/* this might be good */
+	};
+	return hi;
+}
 
-	IPFW_WLOCK_ASSERT(chain);
+/*
+ * allocate a new map, returns the chain locked. extra is the number
+ * of entries to add or delete.
+ */
+static struct ip_fw **
+get_map(struct ip_fw_chain *chain, int extra, int locked)
+{
 
-	chain->id++;
+	for (;;) {
+		struct ip_fw **map;
+		int i;
 
-	for (rule = chain->rules; rule; rule = rule->next)
-		rule->next_rule = NULL;
+		i = chain->n_rules + extra;
+		map = malloc(i * sizeof(struct ip_fw *), M_IPFW, M_WAITOK);
+		if (map == NULL) {
+			printf("%s: cannot allocate map\n", __FUNCTION__);
+			return NULL;
+		}
+		if (!locked)
+			IPFW_UH_WLOCK(chain);
+		if (i >= chain->n_rules + extra) /* good */
+			return map;
+		/* otherwise we lost the race, free and retry */
+		if (!locked)
+			IPFW_UH_WUNLOCK(chain);
+		free(map, M_IPFW);
+	}
+}
+
+/*
+ * swap the maps. It is supposed to be called with IPFW_UH_WLOCK
+ */
+static struct ip_fw **
+swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
+{
+	struct ip_fw **old_map;
+
+	IPFW_WLOCK(chain);
+	chain->id++;
+	chain->n_rules = new_len;
+	old_map = chain->map;
+	chain->map = new_map;
+	IPFW_WUNLOCK(chain);
+	return old_map;
 }
 
 /*
  * Add a new rule to the list. Copy the rule into a malloc'ed area, then
  * possibly create a rule number and add the rule to the list.
  * Update the rule_number in the input struct so the caller knows it as well.
+ * XXX DO NOT USE FOR THE DEFAULT RULE.
+ * Must be called without IPFW_UH held
  */
 int
 ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
 {
-	struct ip_fw *rule, *f, *prev;
-	int l = RULESIZE(input_rule);
+	struct ip_fw *rule;
+	int i, l, insert_before;
+	struct ip_fw **map;	/* the new array of pointers */
 
-	if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE)
+	if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
 		return (EINVAL);
 
-	rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO);
+	l = RULESIZE(input_rule);
+	rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
 	if (rule == NULL)
 		return (ENOSPC);
+	/* get_map returns with IPFW_UH_WLOCK if successful */
+	map = get_map(chain, 1, 0 /* not locked */);
+	if (map == NULL) {
+		free(rule, M_IPFW);
+		return ENOSPC;
+	}
 
 	bcopy(input_rule, rule, l);
-
-	rule->next = NULL;
+	/* clear fields not settable from userland */
+	rule->x_next = NULL;
 	rule->next_rule = NULL;
-
 	rule->pcnt = 0;
 	rule->bcnt = 0;
 	rule->timestamp = 0;
 
-	IPFW_WLOCK(chain);
-
-	if (chain->rules == NULL) {	/* default rule */
-		chain->rules = rule;
-		rule->id = ++chain->id;
-		goto done;
-        }
-
-	/*
-	 * If rulenum is 0, find highest numbered rule before the
-	 * default rule, and add autoinc_step
-	 */
 	if (V_autoinc_step < 1)
 		V_autoinc_step = 1;
 	else if (V_autoinc_step > 1000)
 		V_autoinc_step = 1000;
+	/* find the insertion point, we will insert before */
+	insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
+	i = ipfw_find_rule(chain, insert_before, 0);
+	/* duplicate first part */
+	if (i > 0)
+		bcopy(chain->map, map, i * sizeof(struct ip_fw *));
+	map[i] = rule;
+	/* duplicate remaining part, we always have the default rule */
+	bcopy(chain->map + i, map + i + 1,
+		sizeof(struct ip_fw *) *(chain->n_rules - i));
 	if (rule->rulenum == 0) {
-		/*
-		 * locate the highest numbered rule before default
-		 */
-		for (f = chain->rules; f; f = f->next) {
-			if (f->rulenum == IPFW_DEFAULT_RULE)
-				break;
-			rule->rulenum = f->rulenum;
-		}
+		/* write back the number */
+		rule->rulenum = i > 0 ? map[i-1]->rulenum : 0;
 		if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
 			rule->rulenum += V_autoinc_step;
 		input_rule->rulenum = rule->rulenum;
 	}
 
-	/*
-	 * Now insert the new rule in the right place in the sorted list.
-	 */
-	for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) {
-		if (f->rulenum > rule->rulenum) { /* found the location */
-			if (prev) {
-				rule->next = f;
-				prev->next = rule;
-			} else { /* head insert */
-				rule->next = chain->rules;
-				chain->rules = rule;
-			}
-			break;
-		}
-	}
-	flush_rule_ptrs(chain);
-	/* chain->id incremented inside flush_rule_ptrs() */
-	rule->id = chain->id;
-done:
-	chain->n_rules++;
+	rule->id = chain->id + 1;
+	map = swap_map(chain, map, chain->n_rules + 1);
 	chain->static_len += l;
-	IPFW_WUNLOCK(chain);
+	IPFW_UH_WUNLOCK(chain);
+	if (map)
+		free(map, M_IPFW);
 	return (0);
 }
 
-/**
- * Remove a static rule (including derived * dynamic rules)
- * and place it on the ``reap list'' for later reclamation.
- * The caller is in charge of clearing rule pointers to avoid
- * dangling pointers.
- * @return a pointer to the next entry.
- * Arguments are not checked, so they better be correct.
- */
-static struct ip_fw *
-remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
-    struct ip_fw *prev)
-{
-	struct ip_fw *n;
-	int l = RULESIZE(rule);
-
-	IPFW_WLOCK_ASSERT(chain);
-
-	n = rule->next;
-	ipfw_remove_dyn_children(rule);
-	if (prev == NULL)
-		chain->rules = n;
-	else
-		prev->next = n;
-	chain->n_rules--;
-	chain->static_len -= l;
-
-	rule->next = chain->reap;
-	chain->reap = rule;
-
-	return n;
-}
-
 /*
  * Reclaim storage associated with a list of rules.  This is
  * typically the list created using remove_rule.
@@ -222,34 +228,11 @@ ipfw_reap_rules(struct ip_fw *head)
 	struct ip_fw *rule;
 
 	while ((rule = head) != NULL) {
-		head = head->next;
+		head = head->x_next;
 		free(rule, M_IPFW);
 	}
 }
 
-/*
- * Remove all rules from a chain (except rules in set RESVD_SET
- * unless kill_default = 1).  The caller is responsible for
- * reclaiming storage for the rules left in chain->reap.
- */
-void
-ipfw_free_chain(struct ip_fw_chain *chain, int kill_default)
-{
-	struct ip_fw *prev, *rule;
-
-	IPFW_WLOCK_ASSERT(chain);
-
-	chain->reap = NULL;
-	flush_rule_ptrs(chain); /* more efficient to do outside the loop */
-	for (prev = NULL, rule = chain->rules; rule ; )
-		if (kill_default || rule->set != RESVD_SET)
-			rule = remove_rule(chain, rule, prev);
-		else {
-			prev = rule;
-			rule = rule->next;
-		}
-}
-
 /**
  * Remove all rules with given number, and also do set manipulation.
  * Assumes chain != NULL && *chain != NULL.
@@ -267,9 +250,12 @@ ipfw_free_chain(struct ip_fw_chain *chain, int kill_default)
 static int
 del_entry(struct ip_fw_chain *chain, u_int32_t arg)
 {
-	struct ip_fw *prev = NULL, *rule;
-	u_int16_t rulenum;	/* rule or old_set */
-	u_int8_t cmd, new_set;
+	struct ip_fw *rule;
+	uint32_t rulenum;	/* rule or old_set */
+	uint8_t cmd, new_set;
+	int start, end = 0, i, ofs, n;
+	struct ip_fw **map = NULL;
+	int error = 0;
 
 	rulenum = arg & 0xffff;
 	cmd = (arg >> 24) & 0xff;
@@ -285,96 +271,122 @@ del_entry(struct ip_fw_chain *chain, u_int32_t arg)
 			return EINVAL;
 	}
 
-	IPFW_WLOCK(chain);
-	rule = chain->rules;	/* common starting point */
+	IPFW_UH_WLOCK(chain); /* prevent conflicts among the writers */
 	chain->reap = NULL;	/* prepare for deletions */
+
 	switch (cmd) {
-	case 0:	/* delete rules with given number */
-		/*
-		 * locate first rule to delete
+	case 0:	/* delete rules with given number (0 is special means all) */
+	case 1:	/* delete all rules with given set number, rule->set == rulenum */
+	case 5: /* delete rules with given number and with given set number.
+		 * rulenum - given rule number;
+		 * new_set - given set number.
 		 */
-		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
-			;
-		if (rule->rulenum != rulenum) {
-			IPFW_WUNLOCK(chain);
-			return EINVAL;
-		}
-
-		/*
-		 * flush pointers outside the loop, then delete all matching
-		 * rules. prev remains the same throughout the cycle.
+		/* locate first rule to delete (start), the one after the
+		 * last one (end), and count how many rules to delete (n)
 		 */
-		flush_rule_ptrs(chain);
-		while (rule->rulenum == rulenum)
-			rule = remove_rule(chain, rule, prev);
+		n = 0;
+		if (cmd == 1) { /* look for a specific set, must scan all */
+			for (start = -1, i = 0; i < chain->n_rules; i++) {
+				if (chain->map[start]->set != rulenum)
+					continue;
+				if (start < 0)
+					start = i;
+				end = i;
+				n++;
+			}
+			end++;	/* first non-matching */
+		} else {
+			start = ipfw_find_rule(chain, rulenum, 0);
+			for (end = start; end < chain->n_rules; end++) {
+				rule = chain->map[end];
+				if (rulenum > 0 && rule->rulenum != rulenum)
+					break;
+				if (rule->set != RESVD_SET &&
+				    (cmd == 0 || rule->set == new_set) )
+					n++;
+			}
+		}
+		/* allocate the map, if needed */
+		if (n > 0)
+			map = get_map(chain, -n, 1 /* locked */);
+		if (n == 0 || map == NULL) {
+			error = EINVAL;
 		break;
-
-	case 1:	/* delete all rules with given set number */
-		flush_rule_ptrs(chain);
-		while (rule->rulenum < IPFW_DEFAULT_RULE) {
-			if (rule->set == rulenum)
-				rule = remove_rule(chain, rule, prev);
-			else {
-				prev = rule;
-				rule = rule->next;
+		}
+		/* copy the initial part of the map */
+		if (start > 0)
+			bcopy(chain->map, map, start * sizeof(struct ip_fw *));
+		/* copy active rules between start and end */
+		for (i = ofs = start; i < end; i++) {
+			rule = chain->map[i];
+			if (!(rule->set != RESVD_SET &&
+			    (cmd == 0 || rule->set == new_set) ))
+				map[ofs++] = chain->map[i];
+		}
+		/* finally the tail */
+		bcopy(chain->map + end, map + ofs,
+			(chain->n_rules - end) * sizeof(struct ip_fw *));
+		map = swap_map(chain, map, chain->n_rules - n);
+		/* now remove the rules deleted */
+		for (i = start; i < end; i++) {
+			rule = map[i];
+			if (rule->set != RESVD_SET &&
+			    (cmd == 0 || rule->set == new_set) ) {
+				int l = RULESIZE(rule);
+
+				chain->static_len -= l;
+				ipfw_remove_dyn_children(rule);
+				rule->x_next = chain->reap;
+				chain->reap = rule;
 			}
 		}
 		break;
 
 	case 2:	/* move rules with given number to new set */
-		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
+		IPFW_UH_WLOCK(chain);
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
 			if (rule->rulenum == rulenum)
 				rule->set = new_set;
+		}
+		IPFW_UH_WUNLOCK(chain);
 		break;
 
 	case 3: /* move rules with given set number to new set */
-		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
+		IPFW_UH_WLOCK(chain);
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
 			if (rule->set == rulenum)
 				rule->set = new_set;
+		}
+		IPFW_UH_WUNLOCK(chain);
 		break;
 
 	case 4: /* swap two sets */
-		for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next)
+		IPFW_UH_WLOCK(chain);
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
 			if (rule->set == rulenum)
 				rule->set = new_set;
 			else if (rule->set == new_set)
 				rule->set = rulenum;
-		break;
-
-	case 5: /* delete rules with given number and with given set number.
-		 * rulenum - given rule number;
-		 * new_set - given set number.
-		 */
-		for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
-			;
-		if (rule->rulenum != rulenum) {
-			IPFW_WUNLOCK(chain);
-			return (EINVAL);
-		}
-		flush_rule_ptrs(chain);
-		while (rule->rulenum == rulenum) {
-			if (rule->set == new_set)
-				rule = remove_rule(chain, rule, prev);
-			else {
-				prev = rule;
-				rule = rule->next;
-			}
 		}
+		IPFW_UH_WUNLOCK(chain);
+		break;
 	}
-	/*
-	 * Look for rules to reclaim.  We grab the list before
-	 * releasing the lock then reclaim them w/o the lock to
-	 * avoid a LOR with dummynet.
-	 */
 	rule = chain->reap;
-	IPFW_WUNLOCK(chain);
+	chain->reap = NULL;
+	IPFW_UH_WUNLOCK(chain);
 	ipfw_reap_rules(rule);
-	return 0;
+	if (map)
+		free(map, M_IPFW);
+	return error;
 }
 
 /*
  * Clear counters for a specific rule.
- * The enclosing "table" is assumed locked.
+ * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
+ * so we only care that rules do not disappear.
  */
 static void
 clear_counters(struct ip_fw *rule, int log_only)
@@ -403,6 +415,7 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
 {
 	struct ip_fw *rule;
 	char *msg;
+	int i;
 
 	uint16_t rulenum = arg & 0xffff;
 	uint8_t set = (arg >> 16) & 0xff;
@@ -413,11 +426,12 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
 	if (cmd == 1 && set > RESVD_SET)
 		return (EINVAL);
 
-	IPFW_WLOCK(chain);
+	IPFW_UH_RLOCK(chain);
 	if (rulenum == 0) {
 		V_norule_counter = 0;
-		for (rule = chain->rules; rule; rule = rule->next) {
-			/* Skip rules from another set. */
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
+			/* Skip rules not in our set. */
 			if (cmd == 1 && rule->set != set)
 				continue;
 			clear_counters(rule, log_only);
@@ -426,18 +440,14 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
 		    "Accounting cleared";
 	} else {
 		int cleared = 0;
-		/*
-		 * We can have multiple rules with the same number, so we
-		 * need to clear them all.
-		 */
-		for (rule = chain->rules; rule; rule = rule->next)
+		for (i = 0; i < chain->n_rules; i++) {
+			rule = chain->map[i];
 			if (rule->rulenum == rulenum) {
-				while (rule && rule->rulenum == rulenum) {
 					if (cmd == 0 || rule->set == set)
 						clear_counters(rule, log_only);
-					rule = rule->next;
-				}
 				cleared = 1;
+			}
+			if (rule->rulenum > rulenum)
 				break;
 			}
 		if (!cleared) {	/* we did not find any matching rules */
@@ -446,7 +456,7 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
 		}
 		msg = log_only ? "logging count reset" : "cleared";
 	}
-	IPFW_WUNLOCK(chain);
+	IPFW_UH_RUNLOCK(chain);
 
 	if (V_fw_verbose) {
 		int lev = LOG_SECURITY | LOG_NOTICE;
@@ -497,7 +507,6 @@ check_ipfw_struct(struct ip_fw *rule, int size)
 			    cmd->opcode);
 			return EINVAL;
 		}
-		DEB(printf("ipfw: opcode %d\n", cmd->opcode);)
 		switch (cmd->opcode) {
 		case O_PROBE_STATE:
 		case O_KEEP_STATE:
@@ -771,43 +780,37 @@ bad_size:
 /*
  * Copy the static and dynamic rules to the supplied buffer
  * and return the amount of space actually used.
+ * Must be run under IPFW_UH_RLOCK
  */
 static size_t
 ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
 {
 	char *bp = buf;
 	char *ep = bp + space;
-	struct ip_fw *rule;
-	int i;
+	struct ip_fw *rule, *dst;
+	int l, i;
 	time_t	boot_seconds;
 
         boot_seconds = boottime.tv_sec;
-	/* XXX this can take a long time and locking will block packet flow */
-	IPFW_RLOCK(chain);
-	for (rule = chain->rules; rule ; rule = rule->next) {
-		/*
-		 * Verify the entry fits in the buffer in case the
-		 * rules changed between calculating buffer space and
-		 * now.  This would be better done using a generation
-		 * number but should suffice for now.
-		 */
-		i = RULESIZE(rule);
-		if (bp + i <= ep) {
-			bcopy(rule, bp, i);
+	for (i = 0; i < chain->n_rules; i++) {
+		rule = chain->map[i];
+		l = RULESIZE(rule);
+		if (bp + l > ep) { /* should not happen */
+			printf("overflow dumping static rules\n");
+			break;
+		}
+		dst = (struct ip_fw *)bp;
+		bcopy(rule, dst, l);
 			/*
 			 * XXX HACK. Store the disable mask in the "next"
 			 * pointer in a wild attempt to keep the ABI the same.
 			 * Why do we do this on EVERY rule?
 			 */
-			bcopy(&V_set_disable,
-			    &(((struct ip_fw *)bp)->next_rule),
-			    sizeof(V_set_disable));
-			if (((struct ip_fw *)bp)->timestamp)
-				((struct ip_fw *)bp)->timestamp += boot_seconds;
-			bp += i;
+		bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
+		if (dst->timestamp)
+			dst->timestamp += boot_seconds;
+		bp += l;
 		}
-	}
-	IPFW_RUNLOCK(chain);
 	ipfw_get_dynamic(&bp, ep); /* protected by the dynamic lock */
 	return (bp - (char *)buf);
 }
@@ -857,41 +860,33 @@ ipfw_ctl(struct sockopt *sopt)
 		 * change between calculating the size and returning the
 		 * data in which case we'll just return what fits.
 		 */
-		size = chain->static_len;	/* size of static rules */
-		size += ipfw_dyn_len();
+		for (;;) {
+			int len = 0, want;
 
+			size = chain->static_len;
+			size += ipfw_dyn_len();
 		if (size >= sopt->sopt_valsize)
 			break;
-		/*
-		 * XXX todo: if the user passes a short length just to know
-		 * how much room is needed, do not bother filling up the
-		 * buffer, just jump to the sooptcopyout.
-		 */
 		buf = malloc(size, M_TEMP, M_WAITOK);
-		error = sooptcopyout(sopt, buf,
-				ipfw_getrules(chain, buf, size));
+			if (buf == NULL)
+				break;
+			IPFW_UH_RLOCK(chain);
+			/* check again how much space we need */
+			want = chain->static_len + ipfw_dyn_len();
+			if (size >= want)
+				len = ipfw_getrules(chain, buf, size);
+			IPFW_UH_RUNLOCK(chain);
+			if (size >= want)
+				error = sooptcopyout(sopt, buf, len);
 		free(buf, M_TEMP);
+			if (size >= want)
+				break;
+		}
 		break;
 
 	case IP_FW_FLUSH:
-		/*
-		 * Normally we cannot release the lock on each iteration.
-		 * We could do it here only because we start from the head all
-		 * the times so there is no risk of missing some entries.
-		 * On the other hand, the risk is that we end up with
-		 * a very inconsistent ruleset, so better keep the lock
-		 * around the whole cycle.
-		 *
-		 * XXX this code can be improved by resetting the head of
-		 * the list to point to the default rule, and then freeing
-		 * the old list without the need for a lock.
-		 */
-
-		IPFW_WLOCK(chain);
-		ipfw_free_chain(chain, 0 /* keep default rule */);
-		rule = chain->reap;
-		IPFW_WUNLOCK(chain);
-		ipfw_reap_rules(rule);
+		/* locking is done within del_entry() */
+		error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */
 		break;
 
 	case IP_FW_ADD:
@@ -901,6 +896,7 @@ ipfw_ctl(struct sockopt *sopt)
 		if (error == 0)
 			error = check_ipfw_struct(rule, sopt->sopt_valsize);
 		if (error == 0) {
+			/* locking is done within ipfw_add_rule() */
 			error = ipfw_add_rule(chain, rule);
 			size = RULESIZE(rule);
 			if (!error && sopt->sopt_dir == SOPT_GET)
@@ -931,9 +927,11 @@ ipfw_ctl(struct sockopt *sopt)
 			/* delete or reassign, locking done in del_entry() */
 			error = del_entry(chain, rulenum[0]);
 		} else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */
+			IPFW_UH_WLOCK(chain);
 			V_set_disable =
 			    (V_set_disable | rulenum[0]) & ~rulenum[1] &
 			    ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
+			IPFW_UH_WUNLOCK(chain);
 		} else
 			error = EINVAL;
 		break;