summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@virtuozzo.com>2016-01-14 15:19:41 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 16:00:49 -0800
commit9ee11ba4251dddf1b0e507d184b25b1bd7820773 (patch)
treee07255c31c668b5a6cb7dc04bf46dd3542a8eb51 /net
parent316bda0e6cc5f36f94b4af8bded16d642c90ad75 (diff)
downloadop-kernel-dev-9ee11ba4251dddf1b0e507d184b25b1bd7820773.zip
op-kernel-dev-9ee11ba4251dddf1b0e507d184b25b1bd7820773.tar.gz
memcg: do not allow to disable tcp accounting after limit is set
There are two bits defined for cg_proto->flags - MEMCG_SOCK_ACTIVATED and MEMCG_SOCK_ACTIVE - both are set in tcp_update_limit, but the former is never cleared while the latter can be cleared by unsetting the limit. This allows to disable tcp socket accounting for new sockets after it was enabled by writing -1 to memory.kmem.tcp.limit_in_bytes while still guaranteeing that memcg_socket_limit_enabled static key will be decremented on memcg destruction. This functionality looks dubious, because it is not clear what a use case would be. By enabling tcp accounting a user accepts the price. If they then find the performance degradation unacceptable, they can always restart their workload with tcp accounting disabled. It does not seem there is any need to flip it while the workload is running. Besides, it contradicts to how kmem accounting API works: writing whatever to memory.kmem.limit_in_bytes enables kmem accounting for the cgroup in question, after which it cannot be disabled. Therefore one might expect that writing -1 to memory.kmem.tcp.limit_in_bytes just enables socket accounting w/o limiting it, which might be useful by itself, but it isn't true. Since this API peculiarity is not documented anywhere, I propose to drop it. This will allow to simplify the code by dropping cg_proto->flags. Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_memcontrol.c17
1 files changed, 5 insertions, 12 deletions
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 2379c1b..d07579a 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -48,7 +48,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
percpu_counter_destroy(&cg_proto->sockets_allocated);
- if (test_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
+ if (cg_proto->active)
static_key_slow_dec(&memcg_socket_limit_enabled);
}
@@ -72,11 +72,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
sysctl_tcp_mem[i]);
- if (nr_pages == PAGE_COUNTER_MAX)
- clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
- else {
+ if (!cg_proto->active) {
/*
- * The active bit needs to be written after the static_key
+ * The active flag needs to be written after the static_key
* update. This is what guarantees that the socket activation
* function is the last one to run. See sock_update_memcg() for
* details, and note that we don't mark any socket as belonging
@@ -90,14 +88,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
* We never race with the readers in sock_update_memcg(),
* because when this value change, the code to process it is not
* patched in yet.
- *
- * The activated bit is used to guarantee that no two writers
- * will do the update in the same memcg. Without that, we can't
- * properly shutdown the static key.
*/
- if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
- static_key_slow_inc(&memcg_socket_limit_enabled);
- set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
+ static_key_slow_inc(&memcg_socket_limit_enabled);
+ cg_proto->active = true;
}
return 0;
OpenPOWER on IntegriCloud