memcontrol: schedule throttling if we are congested
authorTejun Heo <tj@kernel.org>
Tue, 3 Jul 2018 15:14:56 +0000 (11:14 -0400)
committerJens Axboe <axboe@kernel.dk>
Mon, 9 Jul 2018 15:07:54 +0000 (09:07 -0600)
Memory allocations can induce swapping via kswapd or direct reclaim.  If
we are having IO done for us by kswapd and don't actually go into direct
reclaim we may never get scheduled for throttling.  So instead check to
see if our cgroup is congested, and if so schedule the throttling.
Before we return to user space the throttling stuff will only throttle
if we actually required it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/linux/memcontrol.h
include/linux/swap.h
mm/huge_memory.c
mm/memcontrol.c
mm/memory.c
mm/shmem.c
mm/swapfile.c

index 6c6fb116e9258859951b5e96ee6d3da33a274cc4..680d3395fc838269870fb836664826fa296cc9e9 100644 (file)
@@ -317,6 +317,9 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
 int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
                          gfp_t gfp_mask, struct mem_cgroup **memcgp,
                          bool compound);
+int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
+                         gfp_t gfp_mask, struct mem_cgroup **memcgp,
+                         bool compound);
 void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
                              bool lrucare, bool compound);
 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
@@ -789,6 +792,16 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
        return 0;
 }
 
+static inline int mem_cgroup_try_charge_delay(struct page *page,
+                                             struct mm_struct *mm,
+                                             gfp_t gfp_mask,
+                                             struct mem_cgroup **memcgp,
+                                             bool compound)
+{
+       *memcgp = NULL;
+       return 0;
+}
+
 static inline void mem_cgroup_commit_charge(struct page *page,
                                            struct mem_cgroup *memcg,
                                            bool lrucare, bool compound)
index c063443d86381eabac4cde76d723dd0991091271..1a8bd05a335ed787b29142913035eacef7c36273 100644 (file)
@@ -629,7 +629,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 
        return memcg->swappiness;
 }
-
 #else
 static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 {
@@ -637,6 +636,16 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 }
 #endif
 
+#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
+                                        gfp_t gfp_mask);
+#else
+static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg,
+                                               int node, gfp_t gfp_mask)
+{
+}
+#endif
+
 #ifdef CONFIG_MEMCG_SWAP
 extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
 extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
index 1cd7c1a57a144320b7d1729d7caa6ec93351cc54..b87d5b151db2bff67b2bee3cdeabb4aaf7720945 100644 (file)
@@ -552,7 +552,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
        VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-       if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+       if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) {
                put_page(page);
                count_vm_event(THP_FAULT_FALLBACK);
                return VM_FAULT_FALLBACK;
@@ -1142,7 +1142,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
                pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
                                               vmf->address, page_to_nid(page));
                if (unlikely(!pages[i] ||
-                            mem_cgroup_try_charge(pages[i], vma->vm_mm,
+                            mem_cgroup_try_charge_delay(pages[i], vma->vm_mm,
                                     GFP_KERNEL, &memcg, false))) {
                        if (pages[i])
                                put_page(pages[i]);
@@ -1312,7 +1312,7 @@ alloc:
                goto out;
        }
 
-       if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
+       if (unlikely(mem_cgroup_try_charge_delay(new_page, vma->vm_mm,
                                        huge_gfp, &memcg, true))) {
                put_page(new_page);
                split_huge_pmd(vma, vmf->pmd, vmf->address);
index e6f0d5ef320aa65d2b65ceed4b202021a84fd49b..64bd28d3538805446a2720040ddafad90627ded3 100644 (file)
@@ -5593,6 +5593,19 @@ out:
        return ret;
 }
 
+int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
+                         gfp_t gfp_mask, struct mem_cgroup **memcgp,
+                         bool compound)
+{
+       struct mem_cgroup *memcg;
+       int ret;
+
+       ret = mem_cgroup_try_charge(page, mm, gfp_mask, memcgp, compound);
+       memcg = *memcgp;
+       mem_cgroup_throttle_swaprate(memcg, page_to_nid(page), gfp_mask);
+       return ret;
+}
+
 /**
  * mem_cgroup_commit_charge - commit a page charge
  * @page: page to charge
index 7206a634270be3641e2255aa4c9d9eee68daed51..dfe80c574282de9f3f13cdbd13dbb064b057a4a5 100644 (file)
@@ -2503,7 +2503,7 @@ static int wp_page_copy(struct vm_fault *vmf)
                cow_user_page(new_page, old_page, vmf->address, vma);
        }
 
-       if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
+       if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
                goto oom_free_new;
 
        __SetPageUptodate(new_page);
@@ -3003,8 +3003,8 @@ int do_swap_page(struct vm_fault *vmf)
                goto out_page;
        }
 
-       if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
-                               &memcg, false)) {
+       if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL,
+                                       &memcg, false)) {
                ret = VM_FAULT_OOM;
                goto out_page;
        }
@@ -3165,7 +3165,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
        if (!page)
                goto oom;
 
-       if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
+       if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
+                                       false))
                goto oom_free_page;
 
        /*
@@ -3661,7 +3662,7 @@ static int do_cow_fault(struct vm_fault *vmf)
        if (!vmf->cow_page)
                return VM_FAULT_OOM;
 
-       if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
+       if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
                                &vmf->memcg, false)) {
                put_page(vmf->cow_page);
                return VM_FAULT_OOM;
index 2cab8440305531f8ab97f3a56c95b91516bcd2ea..6206ca3510cf5fa86550edeb4bfa40baed1dbf4f 100644 (file)
@@ -1239,8 +1239,8 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
         * the shmem_swaplist_mutex which might hold up shmem_writepage().
         * Charged back to the user (not to caller) when swap account is used.
         */
-       error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
-                       false);
+       error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL,
+                                           &memcg, false);
        if (error)
                goto out;
        /* No radix_tree_preload: swap entry keeps a place for page in tree */
@@ -1712,7 +1712,7 @@ repeat:
                                goto failed;
                }
 
-               error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
+               error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
                                false);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
@@ -1818,7 +1818,7 @@ alloc_nohuge:             page = shmem_alloc_and_acct_page(gfp, inode,
                if (sgp == SGP_WRITE)
                        __SetPageReferenced(page);
 
-               error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
+               error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
                                PageTransHuge(page));
                if (error)
                        goto unacct;
@@ -2291,7 +2291,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
        __SetPageSwapBacked(page);
        __SetPageUptodate(page);
 
-       ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
+       ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
        if (ret)
                goto out_release;
 
index 2cc2972eedaf1088e8a1ea0017f701b18c729494..db4ec8ae1c8c49c65f0406abe9be2d0d445e37e7 100644 (file)
@@ -3731,6 +3731,37 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
        }
 }
 
+#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
+                                 gfp_t gfp_mask)
+{
+       struct swap_info_struct *si, *next;
+       if (!(gfp_mask & __GFP_IO) || !memcg)
+               return;
+
+       if (!blk_cgroup_congested())
+               return;
+
+       /*
+        * We've already scheduled a throttle, avoid taking the global swap
+        * lock.
+        */
+       if (current->throttle_queue)
+               return;
+
+       spin_lock(&swap_avail_lock);
+       plist_for_each_entry_safe(si, next, &swap_avail_heads[node],
+                                 avail_lists[node]) {
+               if (si->bdev) {
+                       blkcg_schedule_throttle(bdev_get_queue(si->bdev),
+                                               true);
+                       break;
+               }
+       }
+       spin_unlock(&swap_avail_lock);
+}
+#endif
+
 static int __init swapfile_init(void)
 {
        int nid;