mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages
authorRoman Gushchin <guro@fb.com>
Fri, 12 Jul 2019 03:56:31 +0000 (20:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Jul 2019 18:05:44 +0000 (11:05 -0700)
Every slab page charged to a non-root memory cgroup has a pointer to the
memory cgroup and holds a reference to it, which protects a non-empty
memory cgroup from being released.  At the same time the page has a
pointer to the corresponding kmem_cache, and also hold a reference to the
kmem_cache.  And kmem_cache by itself holds a reference to the cgroup.

So there is clearly some redundancy, which allows to stop setting the
page->mem_cgroup pointer and rely on getting memcg pointer indirectly via
kmem_cache.  Further it will allow to change this pointer easier, without
a need to go over all charged pages.

So let's stop setting page->mem_cgroup pointer for slab pages, and stop
using the css refcounter directly for protecting the memory cgroup from
going away.  Instead rely on kmem_cache as an intermediate object.

Make sure that vmstats and shrinker lists are working as previously, as
well as /proc/kpagecgroup interface.

Link: http://lkml.kernel.org/r/20190611231813.3148843-10-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Waiman Long <longman@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: Qian Cai <cai@lca.pw>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/list_lru.c
mm/memcontrol.c
mm/slab.h

index 927d85be32f62deb58da1029603a01e3a04e61d9..0f1f6b06b7f365ee65643007ec686a783c5148b6 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/memcontrol.h>
+#include "slab.h"
 
 #ifdef CONFIG_MEMCG_KMEM
 static LIST_HEAD(list_lrus);
@@ -63,7 +64,7 @@ static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
        if (!memcg_kmem_enabled())
                return NULL;
        page = virt_to_head_page(ptr);
-       return page->mem_cgroup;
+       return memcg_from_slab_page(page);
 }
 
 static inline struct list_lru_one *
index ce4ce5e7937b9bcf614d66a0c9b68968f6eabf66..fa39e51b3d94aadab475d8e4c91345d26bc80b17 100644 (file)
@@ -486,7 +486,10 @@ ino_t page_cgroup_ino(struct page *page)
        unsigned long ino = 0;
 
        rcu_read_lock();
-       memcg = READ_ONCE(page->mem_cgroup);
+       if (PageHead(page) && PageSlab(page))
+               memcg = memcg_from_slab_page(page);
+       else
+               memcg = READ_ONCE(page->mem_cgroup);
        while (memcg && !(memcg->css.flags & CSS_ONLINE))
                memcg = parent_mem_cgroup(memcg);
        if (memcg)
@@ -2802,9 +2805,6 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
                cancel_charge(memcg, nr_pages);
                return -ENOMEM;
        }
-
-       page->mem_cgroup = memcg;
-
        return 0;
 }
 
@@ -2827,8 +2827,10 @@ int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
        memcg = get_mem_cgroup_from_current();
        if (!mem_cgroup_is_root(memcg)) {
                ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
-               if (!ret)
+               if (!ret) {
+                       page->mem_cgroup = memcg;
                        __SetPageKmemcg(page);
+               }
        }
        css_put(&memcg->css);
        return ret;
index 5d2b8511e6fb39e272e1ce22338b0c90788849b9..7ead47cb9338531d832af29eb1577a0fa0ac4032 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -255,30 +255,67 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
        return s->memcg_params.root_cache;
 }
 
+/*
+ * Expects a pointer to a slab page. Please note, that PageSlab() check
+ * isn't sufficient, as it returns true also for tail compound slab pages,
+ * which do not have slab_cache pointer set.
+ * So this function assumes that the page can pass PageHead() and PageSlab()
+ * checks.
+ */
+static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
+{
+       struct kmem_cache *s;
+
+       s = READ_ONCE(page->slab_cache);
+       if (s && !is_root_cache(s))
+               return s->memcg_params.memcg;
+
+       return NULL;
+}
+
+/*
+ * Charge the slab page belonging to the non-root kmem_cache.
+ * Can be called for non-root kmem_caches only.
+ */
 static __always_inline int memcg_charge_slab(struct page *page,
                                             gfp_t gfp, int order,
                                             struct kmem_cache *s)
 {
+       struct mem_cgroup *memcg;
+       struct lruvec *lruvec;
        int ret;
 
-       if (is_root_cache(s))
-               return 0;
-
-       ret = memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg);
+       memcg = s->memcg_params.memcg;
+       ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
        if (ret)
                return ret;
 
+       lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
+       mod_lruvec_state(lruvec, cache_vmstat_idx(s), 1 << order);
+
+       /* transer try_charge() page references to kmem_cache */
        percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
+       css_put_many(&memcg->css, 1 << order);
 
        return 0;
 }
 
+/*
+ * Uncharge a slab page belonging to a non-root kmem_cache.
+ * Can be called for non-root kmem_caches only.
+ */
 static __always_inline void memcg_uncharge_slab(struct page *page, int order,
                                                struct kmem_cache *s)
 {
-       if (!is_root_cache(s))
-               percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
-       memcg_kmem_uncharge(page, order);
+       struct mem_cgroup *memcg;
+       struct lruvec *lruvec;
+
+       memcg = s->memcg_params.memcg;
+       lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
+       mod_lruvec_state(lruvec, cache_vmstat_idx(s), -(1 << order));
+       memcg_kmem_uncharge_memcg(page, order, memcg);
+
+       percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
 }
 
 extern void slab_init_memcg_params(struct kmem_cache *);
@@ -314,6 +351,11 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
        return s;
 }
 
+static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
+{
+       return NULL;
+}
+
 static inline int memcg_charge_slab(struct page *page, gfp_t gfp, int order,
                                    struct kmem_cache *s)
 {
@@ -351,18 +393,24 @@ static __always_inline int charge_slab_page(struct page *page,
                                            gfp_t gfp, int order,
                                            struct kmem_cache *s)
 {
-       int ret = memcg_charge_slab(page, gfp, order, s);
-
-       if (!ret)
-               mod_lruvec_page_state(page, cache_vmstat_idx(s), 1 << order);
+       if (is_root_cache(s)) {
+               mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+                                   1 << order);
+               return 0;
+       }
 
-       return ret;
+       return memcg_charge_slab(page, gfp, order, s);
 }
 
 static __always_inline void uncharge_slab_page(struct page *page, int order,
                                               struct kmem_cache *s)
 {
-       mod_lruvec_page_state(page, cache_vmstat_idx(s), -(1 << order));
+       if (is_root_cache(s)) {
+               mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+                                   -(1 << order));
+               return;
+       }
+
        memcg_uncharge_slab(page, order, s);
 }