summaryrefslogtreecommitdiffstats
path: root/sys/netpfil
diff options
context:
space:
mode:
authorglebius <glebius@FreeBSD.org>2012-09-22 10:14:47 +0000
committerglebius <glebius@FreeBSD.org>2012-09-22 10:14:47 +0000
commitab6639fea0be0b5523a77127e0f9f567d5ede818 (patch)
tree42704265a07b2efb4d603f46e192696df357fe86 /sys/netpfil
parent9c41e27b31517f0c2ebdbd49332884444f2b3592 (diff)
downloadFreeBSD-src-ab6639fea0be0b5523a77127e0f9f567d5ede818.zip
FreeBSD-src-ab6639fea0be0b5523a77127e0f9f567d5ede818.tar.gz
When connection rate hits and we overload a source to a table,
we are actually editing table, which means editing rules, thus we need writer access to 'em. Fix this by offloading the update of table to the same taskqueue, we already use for flushing. Since taskqueues major task is now overloading, and flushing is optional, do mechanical rename s/flush/overload/ in the code related to the taskqueue. Since overloading tasks do unsafe referencing of rules, provide a bandaid in pf_purge_unlinked_rules(). If the latter sees any queued tasks, then it skips purging for this run. In table code: - Assert any lock in pfr_lookup_addr(). - Assert writer lock in pfr_route_kentry().
Diffstat (limited to 'sys/netpfil')
-rw-r--r--sys/netpfil/pf/pf.c186
-rw-r--r--sys/netpfil/pf/pf_table.c4
2 files changed, 109 insertions, 81 deletions
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index b974881..1f17ab5 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -163,25 +163,25 @@ static struct mtx pf_sendqueue_mtx;
#define PF_SENDQ_UNLOCK() mtx_unlock(&pf_sendqueue_mtx)
/*
- * Queue for pf_flush_task() tasks.
+ * Queue for pf_overload_task() tasks.
*/
-struct pf_flush_entry {
- SLIST_ENTRY(pf_flush_entry) next;
+struct pf_overload_entry {
+ SLIST_ENTRY(pf_overload_entry) next;
struct pf_addr addr;
sa_family_t af;
uint8_t dir;
- struct pf_rule *rule; /* never dereferenced */
+ struct pf_rule *rule;
};
-SLIST_HEAD(pf_flush_head, pf_flush_entry);
-static VNET_DEFINE(struct pf_flush_head, pf_flushqueue);
-#define V_pf_flushqueue VNET(pf_flushqueue)
-static VNET_DEFINE(struct task, pf_flushtask);
-#define V_pf_flushtask VNET(pf_flushtask)
+SLIST_HEAD(pf_overload_head, pf_overload_entry);
+static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
+#define V_pf_overloadqueue VNET(pf_overloadqueue)
+static VNET_DEFINE(struct task, pf_overloadtask);
+#define V_pf_overloadtask VNET(pf_overloadtask)
-static struct mtx pf_flushqueue_mtx;
-#define PF_FLUSHQ_LOCK() mtx_lock(&pf_flushqueue_mtx)
-#define PF_FLUSHQ_UNLOCK() mtx_unlock(&pf_flushqueue_mtx)
+static struct mtx pf_overloadqueue_mtx;
+#define PF_OVERLOADQ_LOCK() mtx_lock(&pf_overloadqueue_mtx)
+#define PF_OVERLOADQ_UNLOCK() mtx_unlock(&pf_overloadqueue_mtx)
VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
struct mtx pf_unlnkdrules_mtx;
@@ -279,7 +279,7 @@ static int pf_addr_wrap_neq(struct pf_addr_wrap *,
static struct pf_state *pf_find_state(struct pfi_kif *,
struct pf_state_key_cmp *, u_int);
static int pf_src_connlimit(struct pf_state **);
-static void pf_flush_task(void *c, int pending);
+static void pf_overload_task(void *c, int pending);
static int pf_insert_src_node(struct pf_src_node **,
struct pf_rule *, struct pf_addr *, sa_family_t);
static int pf_purge_expired_states(int);
@@ -461,8 +461,7 @@ pf_check_threshold(struct pf_threshold *threshold)
static int
pf_src_connlimit(struct pf_state **state)
{
- struct pfr_addr p;
- struct pf_flush_entry *pffe;
+ struct pf_overload_entry *pfoe;
int bad = 0;
PF_STATE_LOCK_ASSERT(*state);
@@ -494,69 +493,79 @@ pf_src_connlimit(struct pf_state **state)
if ((*state)->rule.ptr->overload_tbl == NULL)
return (1);
- V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
- if (V_pf_status.debug >= PF_DEBUG_MISC) {
- printf("%s: blocking address ", __func__);
- pf_print_host(&(*state)->src_node->addr, 0,
- (*state)->key[PF_SK_WIRE]->af);
- printf("\n");
- }
-
- bzero(&p, sizeof(p));
- p.pfra_af = (*state)->key[PF_SK_WIRE]->af;
- switch ((*state)->key[PF_SK_WIRE]->af) {
-#ifdef INET
- case AF_INET:
- p.pfra_net = 32;
- p.pfra_ip4addr = (*state)->src_node->addr.v4;
- break;
-#endif /* INET */
-#ifdef INET6
- case AF_INET6:
- p.pfra_net = 128;
- p.pfra_ip6addr = (*state)->src_node->addr.v6;
- break;
-#endif /* INET6 */
- }
-
- pfr_insert_kentry((*state)->rule.ptr->overload_tbl, &p, time_second);
-
- if ((*state)->rule.ptr->flush == 0)
- return (1);
-
- /* Schedule flushing task. */
- pffe = malloc(sizeof(*pffe), M_PFTEMP, M_NOWAIT);
- if (pffe == NULL)
+ /* Schedule overloading and flushing task. */
+ pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
+ if (pfoe == NULL)
return (1); /* too bad :( */
- bcopy(&(*state)->src_node->addr, &pffe->addr, sizeof(pffe->addr));
- pffe->af = (*state)->key[PF_SK_WIRE]->af;
- pffe->dir = (*state)->direction;
- if ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL)
- pffe->rule = NULL;
- else
- pffe->rule = (*state)->rule.ptr;
- PF_FLUSHQ_LOCK();
- SLIST_INSERT_HEAD(&V_pf_flushqueue, pffe, next);
- PF_FLUSHQ_UNLOCK();
- taskqueue_enqueue(taskqueue_swi, &V_pf_flushtask);
+ bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
+ pfoe->af = (*state)->key[PF_SK_WIRE]->af;
+ pfoe->rule = (*state)->rule.ptr;
+ pfoe->dir = (*state)->direction;
+ PF_OVERLOADQ_LOCK();
+ SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
+ PF_OVERLOADQ_UNLOCK();
+ taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
return (1);
}
static void
-pf_flush_task(void *c, int pending)
+pf_overload_task(void *c, int pending)
{
- struct pf_flush_head queue;
- struct pf_flush_entry *pffe, *pffe1;
+ struct pf_overload_head queue;
+ struct pfr_addr p;
+ struct pf_overload_entry *pfoe, *pfoe1;
uint32_t killed = 0;
- PF_FLUSHQ_LOCK();
- queue = *(struct pf_flush_head *)c;
- SLIST_INIT((struct pf_flush_head *)c);
- PF_FLUSHQ_UNLOCK();
+ PF_OVERLOADQ_LOCK();
+ queue = *(struct pf_overload_head *)c;
+ SLIST_INIT((struct pf_overload_head *)c);
+ PF_OVERLOADQ_UNLOCK();
+
+ bzero(&p, sizeof(p));
+ SLIST_FOREACH(pfoe, &queue, next) {
+ V_pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
+ if (V_pf_status.debug >= PF_DEBUG_MISC) {
+ printf("%s: blocking address ", __func__);
+ pf_print_host(&pfoe->addr, 0, pfoe->af);
+ printf("\n");
+ }
+
+ p.pfra_af = pfoe->af;
+ switch (pfoe->af) {
+#ifdef INET
+ case AF_INET:
+ p.pfra_net = 32;
+ p.pfra_ip4addr = pfoe->addr.v4;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ p.pfra_net = 128;
+ p.pfra_ip6addr = pfoe->addr.v6;
+ break;
+#endif
+ }
+
+ PF_RULES_WLOCK();
+ pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
+ PF_RULES_WUNLOCK();
+ }
+
+ /*
+ * Remove those entries, that don't need flushing.
+ */
+ SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
+ if (pfoe->rule->flush == 0) {
+ SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
+ free(pfoe, M_PFTEMP);
+ } else
+ V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
- V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
+ /* If nothing to flush, return. */
+ if (SLIST_EMPTY(&queue))
+ return;
for (int i = 0; i <= V_pf_hashmask; i++) {
struct pf_idhash *ih = &V_pf_idhash[i];
@@ -566,13 +575,14 @@ pf_flush_task(void *c, int pending)
PF_HASHROW_LOCK(ih);
LIST_FOREACH(s, &ih->states, entry) {
sk = s->key[PF_SK_WIRE];
- SLIST_FOREACH(pffe, &queue, next)
- if (sk->af == pffe->af && (pffe->rule == NULL ||
- pffe->rule == s->rule.ptr) &&
- ((pffe->dir == PF_OUT &&
- PF_AEQ(&pffe->addr, &sk->addr[1], sk->af)) ||
- (pffe->dir == PF_IN &&
- PF_AEQ(&pffe->addr, &sk->addr[0], sk->af)))) {
+ SLIST_FOREACH(pfoe, &queue, next)
+ if (sk->af == pfoe->af &&
+ ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
+ pfoe->rule == s->rule.ptr) &&
+ ((pfoe->dir == PF_OUT &&
+ PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
+ (pfoe->dir == PF_IN &&
+ PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
s->timeout = PFTM_PURGE;
s->src.state = s->dst.state = TCPS_CLOSED;
killed++;
@@ -580,8 +590,8 @@ pf_flush_task(void *c, int pending)
}
PF_HASHROW_UNLOCK(ih);
}
- SLIST_FOREACH_SAFE(pffe, &queue, next, pffe1)
- free(pffe, M_PFTEMP);
+ SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
+ free(pfoe, M_PFTEMP);
if (V_pf_status.debug >= PF_DEBUG_MISC)
printf("%s: %u states killed", __func__, killed);
}
@@ -742,12 +752,13 @@ pf_initialize()
sizeof(struct pf_mtag), NULL, NULL, pf_mtag_init, NULL,
UMA_ALIGN_PTR, 0);
- /* Send & flush queues. */
+ /* Send & overload+flush queues. */
STAILQ_INIT(&V_pf_sendqueue);
- SLIST_INIT(&V_pf_flushqueue);
- TASK_INIT(&V_pf_flushtask, 0, pf_flush_task, &V_pf_flushqueue);
+ SLIST_INIT(&V_pf_overloadqueue);
+ TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, &V_pf_overloadqueue);
mtx_init(&pf_sendqueue_mtx, "pf send queue", NULL, MTX_DEF);
- mtx_init(&pf_flushqueue_mtx, "pf flush queue", NULL, MTX_DEF);
+ mtx_init(&pf_overloadqueue_mtx, "pf overload/flush queue", NULL,
+ MTX_DEF);
/* Unlinked, but may be referenced rules. */
TAILQ_INIT(&V_pf_unlinked_rules);
@@ -788,7 +799,7 @@ pf_cleanup()
}
mtx_destroy(&pf_sendqueue_mtx);
- mtx_destroy(&pf_flushqueue_mtx);
+ mtx_destroy(&pf_overloadqueue_mtx);
mtx_destroy(&pf_unlnkdrules_mtx);
uma_zdestroy(V_pf_mtag_z);
@@ -1579,6 +1590,19 @@ pf_purge_unlinked_rules()
struct pf_rule *r, *r1;
/*
+ * If we have overloading task pending, then we'd
+ * better skip purging this time. There is a tiny
+ * probability that overloading task references
+ * an already unlinked rule.
+ */
+ PF_OVERLOADQ_LOCK();
+ if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
+ PF_OVERLOADQ_UNLOCK();
+ return;
+ }
+ PF_OVERLOADQ_UNLOCK();
+
+ /*
* Do naive mark-and-sweep garbage collecting of old rules.
* Reference flag is raised by pf_purge_expired_states()
* and pf_purge_expired_src_nodes().
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
index c194095..ac22f82 100644
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@@ -742,6 +742,8 @@ pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact)
struct radix_node_head *head = NULL;
struct pfr_kentry *ke;
+ PF_RULES_ASSERT();
+
bzero(&sa, sizeof(sa));
if (ad->pfra_af == AF_INET) {
FILLIN_SIN(sa.sin, ad->pfra_ip4addr);
@@ -929,6 +931,8 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke)
struct radix_node *rn;
struct radix_node_head *head = NULL;
+ PF_RULES_WASSERT();
+
bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
if (ke->pfrke_af == AF_INET)
head = kt->pfrkt_ip4;
OpenPOWER on IntegriCloud