memcg, slab: do not schedule cache destruction when last page goes away

author Vladimir Davydov <vdavydov@parallels.com>

Wed, 4 Jun 2014 23:07:37 +0000 (16:07 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 4 Jun 2014 23:54:01 +0000 (16:54 -0700)
author Vladimir Davydov <vdavydov@parallels.com>
Wed, 4 Jun 2014 23:07:37 +0000 (16:07 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 4 Jun 2014 23:54:01 +0000 (16:54 -0700)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index 5155d09e749d216e4b1a6571842d006d89c567a9..087a453141810dcbf519d4e6810f9f78adff42cb 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -509,7 +509,6 @@ __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
  int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
  void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
  
-void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
  int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
  
  /**
diff --git a/include/linux/slab.h b/include/linux/slab.h

index a6aab2c0dfc59cee228f3c6059f7f3eb78d7e1e9..905541dd37783e4bb1cd78eb7e6945b1d5b5d788 100644 (file)
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -524,7 +524,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
   * @memcg: pointer to the memcg this cache belongs to
   * @list: list_head for the list of all caches in this memcg
   * @root_cache: pointer to the global, root cache, this cache was derived from
- * @dead: set to true after the memcg dies; the cache may still be around.
   * @nr_pages: number of pages that belongs to this cache.
   * @destroy: worker to be called whenever we are ready, or believe we may be
   *           ready, to destroy this cache.
@@ -540,7 +539,6 @@ struct memcg_cache_params {
                         struct mem_cgroup *memcg;
                         struct list_head list;
                         struct kmem_cache *root_cache;
-                       bool dead;
                         atomic_t nr_pages;
                         struct work_struct destroy;
                 };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 9f4ff49c6adda5145991e5caa3d32259e5f4dc53..6b1c45ced7330db7e6502493aa47df7dedd0732f 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3277,60 +3277,11 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
  
         cachep = memcg_params_to_cache(p);
  
-       /*
-        * If we get down to 0 after shrink, we could delete right away.
-        * However, memcg_release_pages() already puts us back in the workqueue
-        * in that case. If we proceed deleting, we'll get a dangling
-        * reference, and removing the object from the workqueue in that case
-        * is unnecessary complication. We are not a fast path.
-        *
-        * Note that this case is fundamentally different from racing with
-        * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
-        * kmem_cache_shrink, not only we would be reinserting a dead cache
-        * into the queue, but doing so from inside the worker racing to
-        * destroy it.
-        *
-        * So if we aren't down to zero, we'll just schedule a worker and try
-        * again
-        */
-       if (atomic_read(&cachep->memcg_params->nr_pages) != 0)
-               kmem_cache_shrink(cachep);
-       else
+       kmem_cache_shrink(cachep);
+       if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
                 kmem_cache_destroy(cachep);
  }
  
-void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
-{
-       if (!cachep->memcg_params->dead)
-               return;
-
-       /*
-        * There are many ways in which we can get here.
-        *
-        * We can get to a memory-pressure situation while the delayed work is
-        * still pending to run. The vmscan shrinkers can then release all
-        * cache memory and get us to destruction. If this is the case, we'll
-        * be executed twice, which is a bug (the second time will execute over
-        * bogus data). In this case, cancelling the work should be fine.
-        *
-        * But we can also get here from the worker itself, if
-        * kmem_cache_shrink is enough to shake all the remaining objects and
-        * get the page count to 0. In this case, we'll deadlock if we try to
-        * cancel the work (the worker runs with an internal lock held, which
-        * is the same lock we would hold for cancel_work_sync().)
-        *
-        * Since we can't possibly know who got us here, just refrain from
-        * running if there is already work pending
-        */
-       if (work_pending(&cachep->memcg_params->destroy))
-               return;
-       /*
-        * We have to defer the actual destroying to a workqueue, because
-        * we might currently be in a context that cannot sleep.
-        */
-       schedule_work(&cachep->memcg_params->destroy);
-}
-
  int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
  {
         struct kmem_cache *c;
@@ -3356,16 +3307,7 @@ int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
                  * We will now manually delete the caches, so to avoid races
                  * we need to cancel all pending destruction workers and
                  * proceed with destruction ourselves.
-                *
-                * kmem_cache_destroy() will call kmem_cache_shrink internally,
-                * and that could spawn the workers again: it is likely that
-                * the cache still have active pages until this very moment.
-                * This would lead us back to mem_cgroup_destroy_cache.
-                *
-                * But that will not execute at all if the "dead" flag is not
-                * set, so flip it down to guarantee we are in control.
                  */
-               c->memcg_params->dead = false;
                 cancel_work_sync(&c->memcg_params->destroy);
                 kmem_cache_destroy(c);
  
@@ -3387,7 +3329,6 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
         mutex_lock(&memcg->slab_caches_mutex);
         list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
                 cachep = memcg_params_to_cache(params);
-               cachep->memcg_params->dead = true;
                 schedule_work(&cachep->memcg_params->destroy);
         }
         mutex_unlock(&memcg->slab_caches_mutex);
diff --git a/mm/slab.h b/mm/slab.h

index d85d59803d5f701abd8d14995850613e159a3db8..b59447ac45337329612bda515f2742583623e0f2 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -129,11 +129,8 @@ static inline void memcg_bind_pages(struct kmem_cache *s, int order)
  
  static inline void memcg_release_pages(struct kmem_cache *s, int order)
  {
-       if (is_root_cache(s))
-               return;
-
-       if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
-               mem_cgroup_destroy_cache(s);
+       if (!is_root_cache(s))
+               atomic_sub(1 << order, &s->memcg_params->nr_pages);
  }
  
  static inline bool slab_equal_or_root(struct kmem_cache *s,
author	Vladimir Davydov <vdavydov@parallels.com>
	Wed, 4 Jun 2014 23:07:37 +0000 (16:07 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 4 Jun 2014 23:54:01 +0000 (16:54 -0700)
include/linux/memcontrol.h		patch \| blob \| history
include/linux/slab.h		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/slab.h		patch \| blob \| history