diff options
Diffstat (limited to 'net/netfilter/xt_connlimit.c')
-rw-r--r-- | net/netfilter/xt_connlimit.c | 311 |
1 files changed, 242 insertions, 69 deletions
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index c40b269..458464e 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -19,6 +19,7 @@ #include <linux/jhash.h> #include <linux/slab.h> #include <linux/list.h> +#include <linux/rbtree.h> #include <linux/module.h> #include <linux/random.h> #include <linux/skbuff.h> @@ -31,6 +32,10 @@ #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_zones.h> +#define CONNLIMIT_SLOTS 32 +#define CONNLIMIT_LOCK_SLOTS 32 +#define CONNLIMIT_GC_MAX_NODES 8 + /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { struct hlist_node node; @@ -38,16 +43,26 @@ struct xt_connlimit_conn { union nf_inet_addr addr; }; +struct xt_connlimit_rb { + struct rb_node node; + struct hlist_head hhead; /* connections/hosts in same subnet */ + union nf_inet_addr addr; /* search key */ +}; + struct xt_connlimit_data { - struct hlist_head iphash[256]; - spinlock_t lock; + struct rb_root climit_root4[CONNLIMIT_SLOTS]; + struct rb_root climit_root6[CONNLIMIT_SLOTS]; + spinlock_t locks[CONNLIMIT_LOCK_SLOTS]; }; static u_int32_t connlimit_rnd __read_mostly; +static struct kmem_cache *connlimit_rb_cachep __read_mostly; +static struct kmem_cache *connlimit_conn_cachep __read_mostly; static inline unsigned int connlimit_iphash(__be32 addr) { - return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF; + return jhash_1word((__force __u32)addr, + connlimit_rnd) % CONNLIMIT_SLOTS; } static inline unsigned int @@ -60,7 +75,8 @@ connlimit_iphash6(const union nf_inet_addr *addr, for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) res.ip6[i] = addr->ip6[i] & mask->ip6[i]; - return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF; + return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), + connlimit_rnd) % CONNLIMIT_SLOTS; } static inline bool already_closed(const struct nf_conn *conn) @@ -72,13 +88,14 @@ static inline bool already_closed(const struct nf_conn *conn) return 0; } -static inline unsigned int +static int same_source_net(const union nf_inet_addr *addr, const union nf_inet_addr *mask, const union nf_inet_addr *u3, u_int8_t family) { if (family == NFPROTO_IPV4) { - return (addr->ip & mask->ip) == (u3->ip & mask->ip); + return ntohl(addr->ip & mask->ip) - + ntohl(u3->ip & mask->ip); } else { union nf_inet_addr lh, rh; unsigned int i; @@ -88,89 +105,205 @@ same_source_net(const union nf_inet_addr *addr, rh.ip6[i] = u3->ip6[i] & mask->ip6[i]; } - return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)) == 0; + return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)); } } -static int count_them(struct net *net, - struct xt_connlimit_data *data, +static bool add_hlist(struct hlist_head *head, const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr, - const union nf_inet_addr *mask, - u_int8_t family) + const union nf_inet_addr *addr) +{ + struct xt_connlimit_conn *conn; + + conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); + if (conn == NULL) + return false; + conn->tuple = *tuple; + conn->addr = *addr; + hlist_add_head(&conn->node, head); + return true; +} + +static unsigned int check_hlist(struct net *net, + struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + bool *addit) { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; struct hlist_node *n; struct nf_conn *found_ct; - struct hlist_head *hash; - bool addit = true; - int matches = 0; - - if (family == NFPROTO_IPV6) - hash = &data->iphash[connlimit_iphash6(addr, mask)]; - else - hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; + unsigned int length = 0; + *addit = true; rcu_read_lock(); /* check the saved connections */ - hlist_for_each_entry_safe(conn, n, hash, node) { + hlist_for_each_entry_safe(conn, n, head, node) { found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, &conn->tuple); - found_ct = NULL; + if (found == NULL) { + hlist_del(&conn->node); + kmem_cache_free(connlimit_conn_cachep, conn); + continue; + } - if (found != NULL) - found_ct = nf_ct_tuplehash_to_ctrack(found); + found_ct = nf_ct_tuplehash_to_ctrack(found); - if (found_ct != NULL && - nf_ct_tuple_equal(&conn->tuple, tuple) && - !already_closed(found_ct)) + if (nf_ct_tuple_equal(&conn->tuple, tuple)) { /* * Just to be sure we have it only once in the list. * We should not see tuples twice unless someone hooks * this into a table without "-p tcp --syn". */ - addit = false; - - if (found == NULL) { - /* this one is gone */ - hlist_del(&conn->node); - kfree(conn); - continue; - } - - if (already_closed(found_ct)) { + *addit = false; + } else if (already_closed(found_ct)) { /* * we do not care about connections which are * closed already -> ditch it */ nf_ct_put(found_ct); hlist_del(&conn->node); - kfree(conn); + kmem_cache_free(connlimit_conn_cachep, conn); continue; } - if (same_source_net(addr, mask, &conn->addr, family)) - /* same source network -> be counted! */ - ++matches; nf_ct_put(found_ct); + length++; } rcu_read_unlock(); - if (addit) { - /* save the new connection in our list */ - conn = kmalloc(sizeof(*conn), GFP_ATOMIC); - if (conn == NULL) - return -ENOMEM; - conn->tuple = *tuple; - conn->addr = *addr; - hlist_add_head(&conn->node, hash); - ++matches; + return length; +} + +static void tree_nodes_free(struct rb_root *root, + struct xt_connlimit_rb *gc_nodes[], + unsigned int gc_count) +{ + struct xt_connlimit_rb *rbconn; + + while (gc_count) { + rbconn = gc_nodes[--gc_count]; + rb_erase(&rbconn->node, root); + kmem_cache_free(connlimit_rb_cachep, rbconn); + } +} + +static unsigned int +count_tree(struct net *net, struct rb_root *root, + const struct nf_conntrack_tuple *tuple, + const union nf_inet_addr *addr, const union nf_inet_addr *mask, + u8 family) +{ + struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; + struct rb_node **rbnode, *parent; + struct xt_connlimit_rb *rbconn; + struct xt_connlimit_conn *conn; + unsigned int gc_count; + bool no_gc = false; + + restart: + gc_count = 0; + parent = NULL; + rbnode = &(root->rb_node); + while (*rbnode) { + int diff; + bool addit; + + rbconn = container_of(*rbnode, struct xt_connlimit_rb, node); + + parent = *rbnode; + diff = same_source_net(addr, mask, &rbconn->addr, family); + if (diff < 0) { + rbnode = &((*rbnode)->rb_left); + } else if (diff > 0) { + rbnode = &((*rbnode)->rb_right); + } else { + /* same source network -> be counted! */ + unsigned int count; + count = check_hlist(net, &rbconn->hhead, tuple, &addit); + + tree_nodes_free(root, gc_nodes, gc_count); + if (!addit) + return count; + + if (!add_hlist(&rbconn->hhead, tuple, addr)) + return 0; /* hotdrop */ + + return count + 1; + } + + if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) + continue; + + /* only used for GC on hhead, retval and 'addit' ignored */ + check_hlist(net, &rbconn->hhead, tuple, &addit); + if (hlist_empty(&rbconn->hhead)) + gc_nodes[gc_count++] = rbconn; + } + + if (gc_count) { + no_gc = true; + tree_nodes_free(root, gc_nodes, gc_count); + /* tree_node_free before new allocation permits + * allocator to re-use newly free'd object. + * + * This is a rare event; in most cases we will find + * existing node to re-use. (or gc_count is 0). + */ + goto restart; + } + + /* no match, need to insert new node */ + rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC); + if (rbconn == NULL) + return 0; + + conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); + if (conn == NULL) { + kmem_cache_free(connlimit_rb_cachep, rbconn); + return 0; + } + + conn->tuple = *tuple; + conn->addr = *addr; + rbconn->addr = *addr; + + INIT_HLIST_HEAD(&rbconn->hhead); + hlist_add_head(&conn->node, &rbconn->hhead); + + rb_link_node(&rbconn->node, parent, rbnode); + rb_insert_color(&rbconn->node, root); + return 1; +} + +static int count_them(struct net *net, + struct xt_connlimit_data *data, + const struct nf_conntrack_tuple *tuple, + const union nf_inet_addr *addr, + const union nf_inet_addr *mask, + u_int8_t family) +{ + struct rb_root *root; + int count; + u32 hash; + + if (family == NFPROTO_IPV6) { + hash = connlimit_iphash6(addr, mask); + root = &data->climit_root6[hash]; + } else { + hash = connlimit_iphash(addr->ip & mask->ip); + root = &data->climit_root4[hash]; } - return matches; + spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); + + count = count_tree(net, root, tuple, addr, mask, family); + + spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); + + return count; } static bool @@ -183,7 +316,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct nf_conntrack_tuple *tuple_ptr = &tuple; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; - int connections; + unsigned int connections; ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) @@ -202,12 +335,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) iph->daddr : iph->saddr; } - spin_lock_bh(&info->data->lock); connections = count_them(net, info->data, tuple_ptr, &addr, &info->mask, par->family); - spin_unlock_bh(&info->data->lock); - - if (connections < 0) + if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; @@ -247,29 +377,47 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) return -ENOMEM; } - spin_lock_init(&info->data->lock); - for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) - INIT_HLIST_HEAD(&info->data->iphash[i]); + for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i) + spin_lock_init(&info->data->locks[i]); + + for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) + info->data->climit_root4[i] = RB_ROOT; + for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) + info->data->climit_root6[i] = RB_ROOT; return 0; } -static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) +static void destroy_tree(struct rb_root *r) { - const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; + struct xt_connlimit_rb *rbconn; struct hlist_node *n; - struct hlist_head *hash = info->data->iphash; + struct rb_node *node; + + while ((node = rb_first(r)) != NULL) { + rbconn = container_of(node, struct xt_connlimit_rb, node); + + rb_erase(node, r); + + hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) + kmem_cache_free(connlimit_conn_cachep, conn); + + kmem_cache_free(connlimit_rb_cachep, rbconn); + } +} + +static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_connlimit_info *info = par->matchinfo; unsigned int i; nf_ct_l3proto_module_put(par->family); - for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { - hlist_for_each_entry_safe(conn, n, &hash[i], node) { - hlist_del(&conn->node); - kfree(conn); - } - } + for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) + destroy_tree(&info->data->climit_root4[i]); + for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) + destroy_tree(&info->data->climit_root6[i]); kfree(info->data); } @@ -287,12 +435,37 @@ static struct xt_match connlimit_mt_reg __read_mostly = { static int __init connlimit_mt_init(void) { - return xt_register_match(&connlimit_mt_reg); + int ret; + + BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS); + BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0); + + connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn", + sizeof(struct xt_connlimit_conn), + 0, 0, NULL); + if (!connlimit_conn_cachep) + return -ENOMEM; + + connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb", + sizeof(struct xt_connlimit_rb), + 0, 0, NULL); + if (!connlimit_rb_cachep) { + kmem_cache_destroy(connlimit_conn_cachep); + return -ENOMEM; + } + ret = xt_register_match(&connlimit_mt_reg); + if (ret != 0) { + kmem_cache_destroy(connlimit_conn_cachep); + kmem_cache_destroy(connlimit_rb_cachep); + } + return ret; } static void __exit connlimit_mt_exit(void) { xt_unregister_match(&connlimit_mt_reg); + kmem_cache_destroy(connlimit_conn_cachep); + kmem_cache_destroy(connlimit_rb_cachep); } module_init(connlimit_mt_init); |