summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2012-12-18 14:22:59 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 15:02:14 -0800
commit22933152934f30de6f05b600c03f8a08f853a8d2 (patch)
tree1abc838ffd9a130d25a493091dfe631145feea26 /mm
parent7cf2798240a2a2230cb16a391beef98d8a7ad362 (diff)
downloadop-kernel-dev-22933152934f30de6f05b600c03f8a08f853a8d2.zip
op-kernel-dev-22933152934f30de6f05b600c03f8a08f853a8d2.tar.gz
memcg/sl[au]b: shrink dead caches
This means that when we destroy a memcg cache that happened to be empty, those caches may take a lot of time to go away: removing the memcg reference won't destroy them - because there are pending references, and the empty pages will stay there, until a shrinker is called upon for any reason. In this patch, we will call kmem_cache_shrink() for all dead caches that cannot be destroyed because of remaining pages. After shrinking, it is possible that it could be freed. If this is not the case, we'll schedule a lazy worker to keep trying. Signed-off-by: Glauber Costa <glommer@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Frederic Weisbecker <fweisbec@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: JoonSoo Kim <js1304@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Michal Hocko <mhocko@suse.cz> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Rik van Riel <riel@redhat.com> Cc: Suleiman Souhlal <suleiman@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c46
1 files changed, 43 insertions, 3 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4b68ec2..7633e0d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3080,7 +3080,27 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
cachep = memcg_params_to_cache(p);
- if (!atomic_read(&cachep->memcg_params->nr_pages))
+ /*
+ * If we get down to 0 after shrink, we could delete right away.
+ * However, memcg_release_pages() already puts us back in the workqueue
+ * in that case. If we proceed deleting, we'll get a dangling
+ * reference, and removing the object from the workqueue in that case
+ * is unnecessary complication. We are not a fast path.
+ *
+ * Note that this case is fundamentally different from racing with
+ * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
+ * kmem_cache_shrink, not only we would be reinserting a dead cache
+ * into the queue, but doing so from inside the worker racing to
+ * destroy it.
+ *
+ * So if we aren't down to zero, we'll just schedule a worker and try
+ * again
+ */
+ if (atomic_read(&cachep->memcg_params->nr_pages) != 0) {
+ kmem_cache_shrink(cachep);
+ if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
+ return;
+ } else
kmem_cache_destroy(cachep);
}
@@ -3090,6 +3110,26 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
return;
/*
+ * There are many ways in which we can get here.
+ *
+ * We can get to a memory-pressure situation while the delayed work is
+ * still pending to run. The vmscan shrinkers can then release all
+ * cache memory and get us to destruction. If this is the case, we'll
+ * be executed twice, which is a bug (the second time will execute over
+ * bogus data). In this case, cancelling the work should be fine.
+ *
+ * But we can also get here from the worker itself, if
+ * kmem_cache_shrink is enough to shake all the remaining objects and
+ * get the page count to 0. In this case, we'll deadlock if we try to
+ * cancel the work (the worker runs with an internal lock held, which
+ * is the same lock we would hold for cancel_work_sync().)
+ *
+ * Since we can't possibly know who got us here, just refrain from
+ * running if there is already work pending
+ */
+ if (work_pending(&cachep->memcg_params->destroy))
+ return;
+ /*
* We have to defer the actual destroying to a workqueue, because
* we might currently be in a context that cannot sleep.
*/
@@ -3217,7 +3257,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
* set, so flip it down to guarantee we are in control.
*/
c->memcg_params->dead = false;
- cancel_delayed_work_sync(&c->memcg_params->destroy);
+ cancel_work_sync(&c->memcg_params->destroy);
kmem_cache_destroy(c);
}
mutex_unlock(&set_limit_mutex);
@@ -3242,7 +3282,7 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
cachep = memcg_params_to_cache(params);
cachep->memcg_params->dead = true;
INIT_WORK(&cachep->memcg_params->destroy,
- kmem_cache_destroy_work_func);
+ kmem_cache_destroy_work_func);
schedule_work(&cachep->memcg_params->destroy);
}
mutex_unlock(&memcg->slab_caches_mutex);
OpenPOWER on IntegriCloud