From 5d097056c9a017a3b720849efb5432f37acabbac Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:18:21 -0800 Subject: kmemcg: account certain kmem allocations to memcg Mark those kmem allocations that are known to be easily triggered from userspace as __GFP_ACCOUNT/SLAB_ACCOUNT, which makes them accounted to memcg. For the list, see below: - threadinfo - task_struct - task_delay_info - pid - cred - mm_struct - vm_area_struct and vm_region (nommu) - anon_vma and anon_vma_chain - signal_struct - sighand_struct - fs_struct - files_struct - fdtable and fdtable->full_fds_bits - dentry and external_name - inode for all filesystems. This is the most tedious part, because most filesystems overwrite the alloc_inode method. The list is far from complete, so feel free to add more objects. Nevertheless, it should be close to "account everything" approach and keep most workloads within bounds. Malevolent users will be able to breach the limit, but this was possible even with the former "account everything" approach (simply because it did not account everything in fact). [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Acked-by: Michal Hocko Cc: Tejun Heo Cc: Greg Thelen Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 91c2de6..c044d1e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -294,7 +294,7 @@ static int init_inodecache(void) 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD), + SLAB_MEM_SPREAD | SLAB_ACCOUNT), init_once); if (sock_inode_cachep == NULL) return -ENOMEM; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index d81186d..14f45bf 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1500,7 +1500,7 @@ int register_rpc_pipefs(void) rpc_inode_cachep = kmem_cache_create("rpc_inode_cache", sizeof(struct rpc_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), + SLAB_MEM_SPREAD|SLAB_ACCOUNT), init_once); if (!rpc_inode_cachep) return -ENOMEM; -- cgit v1.1 From 9ee11ba4251dddf1b0e507d184b25b1bd7820773 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 14 Jan 2016 15:19:41 -0800 Subject: memcg: do not allow to disable tcp accounting after limit is set There are two bits defined for cg_proto->flags - MEMCG_SOCK_ACTIVATED and MEMCG_SOCK_ACTIVE - both are set in tcp_update_limit, but the former is never cleared while the latter can be cleared by unsetting the limit. This allows to disable tcp socket accounting for new sockets after it was enabled by writing -1 to memory.kmem.tcp.limit_in_bytes while still guaranteeing that memcg_socket_limit_enabled static key will be decremented on memcg destruction. This functionality looks dubious, because it is not clear what a use case would be. By enabling tcp accounting a user accepts the price. If they then find the performance degradation unacceptable, they can always restart their workload with tcp accounting disabled. It does not seem there is any need to flip it while the workload is running. Besides, it contradicts to how kmem accounting API works: writing whatever to memory.kmem.limit_in_bytes enables kmem accounting for the cgroup in question, after which it cannot be disabled. Therefore one might expect that writing -1 to memory.kmem.tcp.limit_in_bytes just enables socket accounting w/o limiting it, which might be useful by itself, but it isn't true. Since this API peculiarity is not documented anywhere, I propose to drop it. This will allow to simplify the code by dropping cg_proto->flags. Signed-off-by: Vladimir Davydov Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/ipv4/tcp_memcontrol.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 2379c1b..d07579a 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -48,7 +48,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) percpu_counter_destroy(&cg_proto->sockets_allocated); - if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) + if (cg_proto->active) static_key_slow_dec(&memcg_socket_limit_enabled); } @@ -72,11 +72,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) cg_proto->sysctl_mem[i] = min_t(long, nr_pages, sysctl_tcp_mem[i]); - if (nr_pages == PAGE_COUNTER_MAX) - clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); - else { + if (!cg_proto->active) { /* - * The active bit needs to be written after the static_key + * The active flag needs to be written after the static_key * update. This is what guarantees that the socket activation * function is the last one to run. See sock_update_memcg() for * details, and note that we don't mark any socket as belonging @@ -90,14 +88,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) * We never race with the readers in sock_update_memcg(), * because when this value change, the code to process it is not * patched in yet. - * - * The activated bit is used to guarantee that no two writers - * will do the update in the same memcg. Without that, we can't - * properly shutdown the static key. */ - if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) - static_key_slow_inc(&memcg_socket_limit_enabled); - set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); + static_key_slow_inc(&memcg_socket_limit_enabled); + cg_proto->active = true; } return 0; -- cgit v1.1 From 3d596f7b907b0281b997cf30c92994a71ad0a1a9 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:05 -0800 Subject: net: tcp_memcontrol: protect all tcp_memcontrol calls by jump-label Move the jump-label from sock_update_memcg() and sock_release_memcg() to the callsite, and so eliminate those function calls when socket accounting is not enabled. This also eliminates the need for dummy functions because the calls will be optimized away if the Kconfig options are not enabled. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/sock.c | 9 ++------- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_ipv4.c | 4 +++- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 5127023..6c5dab0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1507,12 +1507,6 @@ void sk_free(struct sock *sk) } EXPORT_SYMBOL(sk_free); -static void sk_update_clone(const struct sock *sk, struct sock *newsk) -{ - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - sock_update_memcg(newsk); -} - /** * sk_clone_lock - clone a socket, and lock its clone * @sk: the socket to clone @@ -1607,7 +1601,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sk_set_socket(newsk, NULL); newsk->sk_wq = NULL; - sk_update_clone(sk, newsk); + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + sock_update_memcg(newsk); if (newsk->sk_prot->sockets_allocated) sk_sockets_allocated_inc(newsk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7bb1b09..fd17eec 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -422,7 +422,8 @@ void tcp_init_sock(struct sock *sk) sk->sk_rcvbuf = sysctl_tcp_rmem[1]; local_bh_disable(); - sock_update_memcg(sk); + if (mem_cgroup_sockets_enabled) + sock_update_memcg(sk); sk_sockets_allocated_inc(sk); local_bh_enable(); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 65947c1..eb39e02 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1818,7 +1818,9 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_saved_syn_free(tp); sk_sockets_allocated_dec(sk); - sock_release_memcg(sk); + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + sock_release_memcg(sk); } EXPORT_SYMBOL(tcp_v4_destroy_sock); -- cgit v1.1 From af95d7df4059cfeab7e7c244f3564214aada7dad Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:08 -0800 Subject: net: tcp_memcontrol: remove dead per-memcg count of allocated sockets The number of allocated sockets is used for calculations in the soft limit phase, where packets are accepted but the socket is under memory pressure. Since there is no soft limit phase in tcp_memcontrol, and memory pressure is only entered when packets are already dropped, this is actually dead code. Remove it. As this is the last user of parent_cg_proto(), remove that too. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/ipv4/tcp_memcontrol.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index d07579a..6759e0d 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -32,7 +32,6 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) counter_parent = &parent_cg->memory_allocated; page_counter_init(&cg_proto->memory_allocated, counter_parent); - percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); return 0; } @@ -46,8 +45,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) if (!cg_proto) return; - percpu_counter_destroy(&cg_proto->sockets_allocated); - if (cg_proto->active) static_key_slow_dec(&memcg_socket_limit_enabled); -- cgit v1.1 From 80f23124f57c77915a7b4201d8dcba38a38b23f0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:11 -0800 Subject: net: tcp_memcontrol: simplify the per-memcg limit access tcp_memcontrol replicates the global sysctl_mem limit array per cgroup, but it only ever sets these entries to the value of the memory_allocated page_counter limit. Use the latter directly. Signed-off-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/ipv4/tcp_memcontrol.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 6759e0d..ef4268d 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -21,9 +21,6 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) if (!cg_proto) return 0; - cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0]; - cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1]; - cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2]; cg_proto->memory_pressure = 0; cg_proto->memcg = memcg; @@ -54,7 +51,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) { struct cg_proto *cg_proto; - int i; int ret; cg_proto = tcp_prot.proto_cgroup(memcg); @@ -65,10 +61,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) if (ret) return ret; - for (i = 0; i < 3; i++) - cg_proto->sysctl_mem[i] = min_t(long, nr_pages, - sysctl_tcp_mem[i]); - if (!cg_proto->active) { /* * The active flag needs to be written after the static_key -- cgit v1.1 From e805605c721021879a1469bdae45c6f80bc985f4 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:14 -0800 Subject: net: tcp_memcontrol: sanitize tcp memory accounting callbacks There won't be a tcp control soft limit, so integrating the memcg code into the global skmem limiting scheme complicates things unnecessarily. Replace this with simple and clear charge and uncharge calls--hidden behind a jump label--to account skb memory. Note that this is not purely aesthetic: as a result of shoehorning the per-memcg code into the same memory accounting functions that handle the global level, the old code would compare the per-memcg consumption against the smaller of the per-memcg limit and the global limit. This allowed the total consumption of multiple sockets to exceed the global limit, as long as the individual sockets stayed within bounds. After this change, the code will always compare the per-memcg consumption to the per-memcg limit, and the global consumption to the global limit, and thus close this loophole. Without a soft limit, the per-memcg memory pressure state in sockets is generally questionable. However, we did it until now, so we continue to enter it when the hard limit is hit, and packets are dropped, to let other sockets in the cgroup know that they shouldn't grow their transmit windows, either. However, keep it simple in the new callback model and leave memory pressure lazily when the next packet is accepted (as opposed to doing it synchroneously when packets are processed). When packets are dropped, network performance will already be in the toilet, so that should be a reasonable trade-off. As described above, consumption is now checked on the per-memcg level and the global level separately. Likewise, memory pressure states are maintained on both the per-memcg level and the global level, and a socket is considered under pressure when either level asserts as much. Signed-off-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/sock.c | 26 ++++++++++++++++---------- net/ipv4/tcp_output.c | 7 +++++-- 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 6c5dab0..89ae859 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2084,27 +2084,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) struct proto *prot = sk->sk_prot; int amt = sk_mem_pages(size); long allocated; - int parent_status = UNDER_LIMIT; sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; - allocated = sk_memory_allocated_add(sk, amt, &parent_status); + allocated = sk_memory_allocated_add(sk, amt); + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp && + !mem_cgroup_charge_skmem(sk->sk_cgrp, amt)) + goto suppress_allocation; /* Under limit. */ - if (parent_status == UNDER_LIMIT && - allocated <= sk_prot_mem_limits(sk, 0)) { + if (allocated <= sk_prot_mem_limits(sk, 0)) { sk_leave_memory_pressure(sk); return 1; } - /* Under pressure. (we or our parents) */ - if ((parent_status > SOFT_LIMIT) || - allocated > sk_prot_mem_limits(sk, 1)) + /* Under pressure. */ + if (allocated > sk_prot_mem_limits(sk, 1)) sk_enter_memory_pressure(sk); - /* Over hard limit (we or our parents) */ - if ((parent_status == OVER_LIMIT) || - (allocated > sk_prot_mem_limits(sk, 2))) + /* Over hard limit. */ + if (allocated > sk_prot_mem_limits(sk, 2)) goto suppress_allocation; /* guarantee minimum buffer size under pressure */ @@ -2153,6 +2153,9 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt); + return 0; } EXPORT_SYMBOL(__sk_mem_schedule); @@ -2168,6 +2171,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount) sk_memory_allocated_sub(sk, amount); sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount); + if (sk_under_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 412a920..493b489 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2813,13 +2813,16 @@ begin_fwd: */ void sk_forced_mem_schedule(struct sock *sk, int size) { - int amt, status; + int amt; if (size <= sk->sk_forward_alloc) return; amt = sk_mem_pages(size); sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; - sk_memory_allocated_add(sk, amt, &status); + sk_memory_allocated_add(sk, amt); + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + mem_cgroup_charge_skmem(sk->sk_cgrp, amt); } /* Send a FIN. The caller locks the socket for us. -- cgit v1.1 From baac50bbc3cdfd184ebf586b1704edbfcee866df Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:17 -0800 Subject: net: tcp_memcontrol: simplify linkage between socket and page counter There won't be any separate counters for socket memory consumed by protocols other than TCP in the future. Remove the indirection and link sockets directly to their owning memory cgroup. Signed-off-by: Johannes Weiner Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/sock.c | 52 +++++------------------------------- net/ipv4/tcp_ipv4.c | 7 +---- net/ipv4/tcp_memcontrol.c | 67 ++++++++++++++++++----------------------------- net/ipv4/tcp_output.c | 4 +-- net/ipv6/tcp_ipv6.c | 3 --- 5 files changed, 36 insertions(+), 97 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 89ae859..3535bff 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -195,44 +195,6 @@ bool sk_net_capable(const struct sock *sk, int cap) } EXPORT_SYMBOL(sk_net_capable); - -#ifdef CONFIG_MEMCG_KMEM -int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) -{ - struct proto *proto; - int ret = 0; - - mutex_lock(&proto_list_mutex); - list_for_each_entry(proto, &proto_list, node) { - if (proto->init_cgroup) { - ret = proto->init_cgroup(memcg, ss); - if (ret) - goto out; - } - } - - mutex_unlock(&proto_list_mutex); - return ret; -out: - list_for_each_entry_continue_reverse(proto, &proto_list, node) - if (proto->destroy_cgroup) - proto->destroy_cgroup(memcg); - mutex_unlock(&proto_list_mutex); - return ret; -} - -void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) -{ - struct proto *proto; - - mutex_lock(&proto_list_mutex); - list_for_each_entry_reverse(proto, &proto_list, node) - if (proto->destroy_cgroup) - proto->destroy_cgroup(memcg); - mutex_unlock(&proto_list_mutex); -} -#endif - /* * Each address family might have different locking rules, so we have * one slock key per address family: @@ -1601,7 +1563,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sk_set_socket(newsk, NULL); newsk->sk_wq = NULL; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + if (mem_cgroup_sockets_enabled && sk->sk_memcg) sock_update_memcg(newsk); if (newsk->sk_prot->sockets_allocated) @@ -2089,8 +2051,8 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) allocated = sk_memory_allocated_add(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_cgrp && - !mem_cgroup_charge_skmem(sk->sk_cgrp, amt)) + if (mem_cgroup_sockets_enabled && sk->sk_memcg && + !mem_cgroup_charge_skmem(sk->sk_memcg, amt)) goto suppress_allocation; /* Under limit. */ @@ -2153,8 +2115,8 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt); + if (mem_cgroup_sockets_enabled && sk->sk_memcg) + mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); return 0; } @@ -2171,8 +2133,8 @@ void __sk_mem_reclaim(struct sock *sk, int amount) sk_memory_allocated_sub(sk, amount); sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount); + if (mem_cgroup_sockets_enabled && sk->sk_memcg) + mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); if (sk_under_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index eb39e02..c7d1fb5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1819,7 +1819,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk_sockets_allocated_dec(sk); - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + if (mem_cgroup_sockets_enabled && sk->sk_memcg) sock_release_memcg(sk); } EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -2344,11 +2344,6 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_MEMCG_KMEM - .init_cgroup = tcp_init_cgroup, - .destroy_cgroup = tcp_destroy_cgroup, - .proto_cgroup = tcp_proto_cgroup, -#endif .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index ef4268d..e507825 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -8,60 +8,47 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { + struct mem_cgroup *parent = parent_mem_cgroup(memcg); + struct page_counter *counter_parent = NULL; /* * The root cgroup does not use page_counters, but rather, * rely on the data already collected by the network * subsystem */ - struct mem_cgroup *parent = parent_mem_cgroup(memcg); - struct page_counter *counter_parent = NULL; - struct cg_proto *cg_proto, *parent_cg; - - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) + if (memcg == root_mem_cgroup) return 0; - cg_proto->memory_pressure = 0; - cg_proto->memcg = memcg; + memcg->tcp_mem.memory_pressure = 0; - parent_cg = tcp_prot.proto_cgroup(parent); - if (parent_cg) - counter_parent = &parent_cg->memory_allocated; + if (parent) + counter_parent = &parent->tcp_mem.memory_allocated; - page_counter_init(&cg_proto->memory_allocated, counter_parent); + page_counter_init(&memcg->tcp_mem.memory_allocated, counter_parent); return 0; } -EXPORT_SYMBOL(tcp_init_cgroup); void tcp_destroy_cgroup(struct mem_cgroup *memcg) { - struct cg_proto *cg_proto; - - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) + if (memcg == root_mem_cgroup) return; - if (cg_proto->active) + if (memcg->tcp_mem.active) static_key_slow_dec(&memcg_socket_limit_enabled); - } -EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) { - struct cg_proto *cg_proto; int ret; - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) + if (memcg == root_mem_cgroup) return -EINVAL; - ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages); + ret = page_counter_limit(&memcg->tcp_mem.memory_allocated, nr_pages); if (ret) return ret; - if (!cg_proto->active) { + if (!memcg->tcp_mem.active) { /* * The active flag needs to be written after the static_key * update. This is what guarantees that the socket activation @@ -79,7 +66,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) * patched in yet. */ static_key_slow_inc(&memcg_socket_limit_enabled); - cg_proto->active = true; + memcg->tcp_mem.active = true; } return 0; @@ -123,32 +110,32 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg); u64 val; switch (cft->private) { case RES_LIMIT: - if (!cg_proto) - return PAGE_COUNTER_MAX; - val = cg_proto->memory_allocated.limit; + if (memcg == root_mem_cgroup) + val = PAGE_COUNTER_MAX; + else + val = memcg->tcp_mem.memory_allocated.limit; val *= PAGE_SIZE; break; case RES_USAGE: - if (!cg_proto) + if (memcg == root_mem_cgroup) val = atomic_long_read(&tcp_memory_allocated); else - val = page_counter_read(&cg_proto->memory_allocated); + val = page_counter_read(&memcg->tcp_mem.memory_allocated); val *= PAGE_SIZE; break; case RES_FAILCNT: - if (!cg_proto) + if (memcg == root_mem_cgroup) return 0; - val = cg_proto->memory_allocated.failcnt; + val = memcg->tcp_mem.memory_allocated.failcnt; break; case RES_MAX_USAGE: - if (!cg_proto) + if (memcg == root_mem_cgroup) return 0; - val = cg_proto->memory_allocated.watermark; + val = memcg->tcp_mem.memory_allocated.watermark; val *= PAGE_SIZE; break; default: @@ -161,19 +148,17 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg; - struct cg_proto *cg_proto; memcg = mem_cgroup_from_css(of_css(of)); - cg_proto = tcp_prot.proto_cgroup(memcg); - if (!cg_proto) + if (memcg == root_mem_cgroup) return nbytes; switch (of_cft(of)->private) { case RES_MAX_USAGE: - page_counter_reset_watermark(&cg_proto->memory_allocated); + page_counter_reset_watermark(&memcg->tcp_mem.memory_allocated); break; case RES_FAILCNT: - cg_proto->memory_allocated.failcnt = 0; + memcg->tcp_mem.memory_allocated.failcnt = 0; break; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 493b489..fda379c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2821,8 +2821,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size) sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; sk_memory_allocated_add(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_cgrp) - mem_cgroup_charge_skmem(sk->sk_cgrp, amt); + if (mem_cgroup_sockets_enabled && sk->sk_memcg) + mem_cgroup_charge_skmem(sk->sk_memcg, amt); } /* Send a FIN. The caller locks the socket for us. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index db9f1c3..4ad8edb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1889,9 +1889,6 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_MEMCG_KMEM - .proto_cgroup = tcp_proto_cgroup, -#endif .clear_sk = tcp_v6_clear_sk, .diag_destroy = tcp_abort, }; -- cgit v1.1 From 80e95fe0fdcde2812c341ad4209d62dc1a7af53b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:20 -0800 Subject: mm: memcontrol: generalize the socket accounting jump label The unified hierarchy memory controller is going to use this jump label as well to control the networking callbacks. Move it to the memory controller code and give it a more generic name. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/sock.c | 5 ----- net/ipv4/tcp_memcontrol.c | 4 ++-- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 3535bff..6c1c8bc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -202,11 +202,6 @@ EXPORT_SYMBOL(sk_net_capable); static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -#if defined(CONFIG_MEMCG_KMEM) -struct static_key memcg_socket_limit_enabled; -EXPORT_SYMBOL(memcg_socket_limit_enabled); -#endif - /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index e507825..9a22e2d 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -34,7 +34,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) return; if (memcg->tcp_mem.active) - static_key_slow_dec(&memcg_socket_limit_enabled); + static_key_slow_dec(&memcg_sockets_enabled_key); } static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) @@ -65,7 +65,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) * because when this value change, the code to process it is not * patched in yet. */ - static_key_slow_inc(&memcg_socket_limit_enabled); + static_key_slow_inc(&memcg_sockets_enabled_key); memcg->tcp_mem.active = true; } -- cgit v1.1 From ef12947c9c5a96af549c49f10e5503f0612a397c Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 14 Jan 2016 15:21:34 -0800 Subject: mm: memcontrol: switch to the updated jump-label API According to the direct use of struct static_key is deprecated. Update the socket and slab accounting code accordingly. Signed-off-by: Johannes Weiner Acked-by: David S. Miller Reported-by: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/ipv4/tcp_memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 9a22e2d..18bc7f7 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -34,7 +34,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg) return; if (memcg->tcp_mem.active) - static_key_slow_dec(&memcg_sockets_enabled_key); + static_branch_dec(&memcg_sockets_enabled_key); } static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) @@ -65,7 +65,7 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) * because when this value change, the code to process it is not * patched in yet. */ - static_key_slow_inc(&memcg_sockets_enabled_key); + static_branch_inc(&memcg_sockets_enabled_key); memcg->tcp_mem.active = true; } -- cgit v1.1