summaryrefslogtreecommitdiffstats
path: root/net/netfilter/xt_connlimit.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 14:31:10 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 14:31:10 -0800
commitb2fe5fa68642860e7de76167c3111623aa0d5de1 (patch)
treeb7f9b89b7039ecefbc35fe3c8e73a6ff972641dd /net/netfilter/xt_connlimit.c
parenta103950e0dd2058df5e8a8d4a915707bdcf205f0 (diff)
parenta54667f6728c2714a400f3c884727da74b6d1717 (diff)
downloadop-kernel-dev-b2fe5fa68642860e7de76167c3111623aa0d5de1.zip
op-kernel-dev-b2fe5fa68642860e7de76167c3111623aa0d5de1.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Significantly shrink the core networking routing structures. Result of http://vger.kernel.org/~davem/seoul2017_netdev_keynote.pdf 2) Add netdevsim driver for testing various offloads, from Jakub Kicinski. 3) Support cross-chip FDB operations in DSA, from Vivien Didelot. 4) Add a 2nd listener hash table for TCP, similar to what was done for UDP. From Martin KaFai Lau. 5) Add eBPF based queue selection to tun, from Jason Wang. 6) Lockless qdisc support, from John Fastabend. 7) SCTP stream interleave support, from Xin Long. 8) Smoother TCP receive autotuning, from Eric Dumazet. 9) Lots of erspan tunneling enhancements, from William Tu. 10) Add true function call support to BPF, from Alexei Starovoitov. 11) Add explicit support for GRO HW offloading, from Michael Chan. 12) Support extack generation in more netlink subsystems. From Alexander Aring, Quentin Monnet, and Jakub Kicinski. 13) Add 1000BaseX, flow control, and EEE support to mvneta driver. From Russell King. 14) Add flow table abstraction to netfilter, from Pablo Neira Ayuso. 15) Many improvements and simplifications to the NFP driver bpf JIT, from Jakub Kicinski. 16) Support for ipv6 non-equal cost multipath routing, from Ido Schimmel. 17) Add resource abstration to devlink, from Arkadi Sharshevsky. 18) Packet scheduler classifier shared filter block support, from Jiri Pirko. 19) Avoid locking in act_csum, from Davide Caratti. 20) devinet_ioctl() simplifications from Al viro. 21) More TCP bpf improvements from Lawrence Brakmo. 22) Add support for onlink ipv6 route flag, similar to ipv4, from David Ahern. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1925 commits) tls: Add support for encryption using async offload accelerator ip6mr: fix stale iterator net/sched: kconfig: Remove blank help texts openvswitch: meter: Use 64-bit arithmetic instead of 32-bit tcp_nv: fix potential integer overflow in tcpnv_acked r8169: fix RTL8168EP take too long to complete driver initialization. qmi_wwan: Add support for Quectel EP06 rtnetlink: enable IFLA_IF_NETNSID for RTM_NEWLINK ipmr: Fix ptrdiff_t print formatting ibmvnic: Wait for device response when changing MAC qlcnic: fix deadlock bug tcp: release sk_frag.page in tcp_disconnect ipv4: Get the address of interface correctly. net_sched: gen_estimator: fix lockdep splat net: macb: Handle HRESP error net/mlx5e: IPoIB, Fix copy-paste bug in flow steering refactoring ipv6: addrconf: break critical section in addrconf_verify_rtnl() ipv6: change route cache aging logic i40e/i40evf: Update DESC_NEEDED value to reflect larger value bnxt_en: cleanup DIM work on device shutdown ...
Diffstat (limited to 'net/netfilter/xt_connlimit.c')
-rw-r--r--net/netfilter/xt_connlimit.c369
1 files changed, 24 insertions, 345 deletions
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index a6214f2..b1b17b9 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -12,292 +12,30 @@
* GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au).
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/jhash.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/rbtree.h>
+
#include <linux/module.h>
-#include <linux/random.h>
#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_connlimit.h>
+
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_zones.h>
-
-#define CONNLIMIT_SLOTS 256U
-
-#ifdef CONFIG_LOCKDEP
-#define CONNLIMIT_LOCK_SLOTS 8U
-#else
-#define CONNLIMIT_LOCK_SLOTS 256U
-#endif
-
-#define CONNLIMIT_GC_MAX_NODES 8
-
-/* we will save the tuples of all connections we care about */
-struct xt_connlimit_conn {
- struct hlist_node node;
- struct nf_conntrack_tuple tuple;
-};
-
-struct xt_connlimit_rb {
- struct rb_node node;
- struct hlist_head hhead; /* connections/hosts in same subnet */
- union nf_inet_addr addr; /* search key */
-};
-
-static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
-
-struct xt_connlimit_data {
- struct rb_root climit_root[CONNLIMIT_SLOTS];
-};
-
-static u_int32_t connlimit_rnd __read_mostly;
-static struct kmem_cache *connlimit_rb_cachep __read_mostly;
-static struct kmem_cache *connlimit_conn_cachep __read_mostly;
-
-static inline unsigned int connlimit_iphash(__be32 addr)
-{
- return jhash_1word((__force __u32)addr,
- connlimit_rnd) % CONNLIMIT_SLOTS;
-}
-
-static inline unsigned int
-connlimit_iphash6(const union nf_inet_addr *addr)
-{
- return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6),
- connlimit_rnd) % CONNLIMIT_SLOTS;
-}
-
-static inline bool already_closed(const struct nf_conn *conn)
-{
- if (nf_ct_protonum(conn) == IPPROTO_TCP)
- return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
- conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
- else
- return 0;
-}
-
-static int
-same_source(const union nf_inet_addr *addr,
- const union nf_inet_addr *u3, u_int8_t family)
-{
- if (family == NFPROTO_IPV4)
- return ntohl(addr->ip) - ntohl(u3->ip);
-
- return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6));
-}
-
-static bool add_hlist(struct hlist_head *head,
- const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr)
-{
- struct xt_connlimit_conn *conn;
-
- conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
- if (conn == NULL)
- return false;
- conn->tuple = *tuple;
- hlist_add_head(&conn->node, head);
- return true;
-}
-
-static unsigned int check_hlist(struct net *net,
- struct hlist_head *head,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone,
- bool *addit)
-{
- const struct nf_conntrack_tuple_hash *found;
- struct xt_connlimit_conn *conn;
- struct hlist_node *n;
- struct nf_conn *found_ct;
- unsigned int length = 0;
-
- *addit = true;
-
- /* check the saved connections */
- hlist_for_each_entry_safe(conn, n, head, node) {
- found = nf_conntrack_find_get(net, zone, &conn->tuple);
- if (found == NULL) {
- hlist_del(&conn->node);
- kmem_cache_free(connlimit_conn_cachep, conn);
- continue;
- }
-
- found_ct = nf_ct_tuplehash_to_ctrack(found);
-
- if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
- /*
- * Just to be sure we have it only once in the list.
- * We should not see tuples twice unless someone hooks
- * this into a table without "-p tcp --syn".
- */
- *addit = false;
- } else if (already_closed(found_ct)) {
- /*
- * we do not care about connections which are
- * closed already -> ditch it
- */
- nf_ct_put(found_ct);
- hlist_del(&conn->node);
- kmem_cache_free(connlimit_conn_cachep, conn);
- continue;
- }
-
- nf_ct_put(found_ct);
- length++;
- }
-
- return length;
-}
-
-static void tree_nodes_free(struct rb_root *root,
- struct xt_connlimit_rb *gc_nodes[],
- unsigned int gc_count)
-{
- struct xt_connlimit_rb *rbconn;
-
- while (gc_count) {
- rbconn = gc_nodes[--gc_count];
- rb_erase(&rbconn->node, root);
- kmem_cache_free(connlimit_rb_cachep, rbconn);
- }
-}
-
-static unsigned int
-count_tree(struct net *net, struct rb_root *root,
- const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr,
- u8 family, const struct nf_conntrack_zone *zone)
-{
- struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
- struct rb_node **rbnode, *parent;
- struct xt_connlimit_rb *rbconn;
- struct xt_connlimit_conn *conn;
- unsigned int gc_count;
- bool no_gc = false;
-
- restart:
- gc_count = 0;
- parent = NULL;
- rbnode = &(root->rb_node);
- while (*rbnode) {
- int diff;
- bool addit;
-
- rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node);
-
- parent = *rbnode;
- diff = same_source(addr, &rbconn->addr, family);
- if (diff < 0) {
- rbnode = &((*rbnode)->rb_left);
- } else if (diff > 0) {
- rbnode = &((*rbnode)->rb_right);
- } else {
- /* same source network -> be counted! */
- unsigned int count;
- count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
-
- tree_nodes_free(root, gc_nodes, gc_count);
- if (!addit)
- return count;
-
- if (!add_hlist(&rbconn->hhead, tuple, addr))
- return 0; /* hotdrop */
-
- return count + 1;
- }
-
- if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
- continue;
-
- /* only used for GC on hhead, retval and 'addit' ignored */
- check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
- if (hlist_empty(&rbconn->hhead))
- gc_nodes[gc_count++] = rbconn;
- }
-
- if (gc_count) {
- no_gc = true;
- tree_nodes_free(root, gc_nodes, gc_count);
- /* tree_node_free before new allocation permits
- * allocator to re-use newly free'd object.
- *
- * This is a rare event; in most cases we will find
- * existing node to re-use. (or gc_count is 0).
- */
- goto restart;
- }
-
- /* no match, need to insert new node */
- rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
- if (rbconn == NULL)
- return 0;
-
- conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
- if (conn == NULL) {
- kmem_cache_free(connlimit_rb_cachep, rbconn);
- return 0;
- }
-
- conn->tuple = *tuple;
- rbconn->addr = *addr;
-
- INIT_HLIST_HEAD(&rbconn->hhead);
- hlist_add_head(&conn->node, &rbconn->hhead);
-
- rb_link_node(&rbconn->node, parent, rbnode);
- rb_insert_color(&rbconn->node, root);
- return 1;
-}
-
-static int count_them(struct net *net,
- struct xt_connlimit_data *data,
- const struct nf_conntrack_tuple *tuple,
- const union nf_inet_addr *addr,
- u_int8_t family,
- const struct nf_conntrack_zone *zone)
-{
- struct rb_root *root;
- int count;
- u32 hash;
-
- if (family == NFPROTO_IPV6)
- hash = connlimit_iphash6(addr);
- else
- hash = connlimit_iphash(addr->ip);
- root = &data->climit_root[hash];
-
- spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
-
- count = count_tree(net, root, tuple, addr, family, zone);
-
- spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
-
- return count;
-}
+#include <net/netfilter/nf_conntrack_count.h>
static bool
connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct net *net = xt_net(par);
const struct xt_connlimit_info *info = par->matchinfo;
- union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int connections;
+ u32 key[5];
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL) {
@@ -310,6 +48,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (xt_family(par) == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ union nf_inet_addr addr;
unsigned int i;
memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
@@ -317,22 +56,24 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i)
addr.ip6[i] &= info->mask.ip6[i];
+ memcpy(key, &addr, sizeof(addr.ip6));
+ key[4] = zone->id;
} else {
const struct iphdr *iph = ip_hdr(skb);
- addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
+ key[0] = (info->flags & XT_CONNLIMIT_DADDR) ?
iph->daddr : iph->saddr;
- addr.ip &= info->mask.ip;
+ key[0] &= info->mask.ip;
+ key[1] = zone->id;
}
- connections = count_them(net, info->data, tuple_ptr, &addr,
- xt_family(par), zone);
+ connections = nf_conncount_count(net, info->data, key,
+ xt_family(par), tuple_ptr, zone);
if (connections == 0)
/* kmalloc failed, drop it entirely */
goto hotdrop;
- return (connections > info->limit) ^
- !!(info->flags & XT_CONNLIMIT_INVERT);
+ return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT);
hotdrop:
par->hotdrop = true;
@@ -342,61 +83,27 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
static int connlimit_mt_check(const struct xt_mtchk_param *par)
{
struct xt_connlimit_info *info = par->matchinfo;
- unsigned int i;
- int ret;
+ unsigned int keylen;
- net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd));
-
- ret = nf_ct_netns_get(par->net, par->family);
- if (ret < 0) {
- pr_info("cannot load conntrack support for "
- "address family %u\n", par->family);
- return ret;
- }
+ keylen = sizeof(u32);
+ if (par->family == NFPROTO_IPV6)
+ keylen += sizeof(struct in6_addr);
+ else
+ keylen += sizeof(struct in_addr);
/* init private data */
- info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
- if (info->data == NULL) {
- nf_ct_netns_put(par->net, par->family);
- return -ENOMEM;
- }
-
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
- info->data->climit_root[i] = RB_ROOT;
+ info->data = nf_conncount_init(par->net, par->family, keylen);
+ if (IS_ERR(info->data))
+ return PTR_ERR(info->data);
return 0;
}
-static void destroy_tree(struct rb_root *r)
-{
- struct xt_connlimit_conn *conn;
- struct xt_connlimit_rb *rbconn;
- struct hlist_node *n;
- struct rb_node *node;
-
- while ((node = rb_first(r)) != NULL) {
- rbconn = rb_entry(node, struct xt_connlimit_rb, node);
-
- rb_erase(node, r);
-
- hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
- kmem_cache_free(connlimit_conn_cachep, conn);
-
- kmem_cache_free(connlimit_rb_cachep, rbconn);
- }
-}
-
static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_connlimit_info *info = par->matchinfo;
- unsigned int i;
-
- nf_ct_netns_put(par->net, par->family);
-
- for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
- destroy_tree(&info->data->climit_root[i]);
- kfree(info->data);
+ nf_conncount_destroy(par->net, par->family, info->data);
}
static struct xt_match connlimit_mt_reg __read_mostly = {
@@ -413,40 +120,12 @@ static struct xt_match connlimit_mt_reg __read_mostly = {
static int __init connlimit_mt_init(void)
{
- int ret, i;
-
- BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
- BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
-
- for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i)
- spin_lock_init(&xt_connlimit_locks[i]);
-
- connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
- sizeof(struct xt_connlimit_conn),
- 0, 0, NULL);
- if (!connlimit_conn_cachep)
- return -ENOMEM;
-
- connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
- sizeof(struct xt_connlimit_rb),
- 0, 0, NULL);
- if (!connlimit_rb_cachep) {
- kmem_cache_destroy(connlimit_conn_cachep);
- return -ENOMEM;
- }
- ret = xt_register_match(&connlimit_mt_reg);
- if (ret != 0) {
- kmem_cache_destroy(connlimit_conn_cachep);
- kmem_cache_destroy(connlimit_rb_cachep);
- }
- return ret;
+ return xt_register_match(&connlimit_mt_reg);
}
static void __exit connlimit_mt_exit(void)
{
xt_unregister_match(&connlimit_mt_reg);
- kmem_cache_destroy(connlimit_conn_cachep);
- kmem_cache_destroy(connlimit_rb_cachep);
}
module_init(connlimit_mt_init);
OpenPOWER on IntegriCloud