mm: vmscan: replace shrink_node() loop with a retry jump

author Johannes Weiner <hannes@cmpxchg.org>

Sun, 1 Dec 2019 01:55:43 +0000 (17:55 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sun, 1 Dec 2019 20:59:07 +0000 (12:59 -0800)
author Johannes Weiner <hannes@cmpxchg.org>
Sun, 1 Dec 2019 01:55:43 +0000 (17:55 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sun, 1 Dec 2019 20:59:07 +0000 (12:59 -0800)
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 9281f40eeb455b474d7adadabbe731f748a90944..360aab17d0e884eeb31934f495ee2cb4c500c485 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2729,144 +2729,143 @@ static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
  static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
  {
         struct reclaim_state *reclaim_state = current->reclaim_state;
+       struct mem_cgroup *root = sc->target_mem_cgroup;
         unsigned long nr_reclaimed, nr_scanned;
         bool reclaimable = false;
+       struct mem_cgroup *memcg;
+again:
+       memset(&sc->nr, 0, sizeof(sc->nr));
  
-       do {
-               struct mem_cgroup *root = sc->target_mem_cgroup;
-               struct mem_cgroup *memcg;
-
-               memset(&sc->nr, 0, sizeof(sc->nr));
-
-               nr_reclaimed = sc->nr_reclaimed;
-               nr_scanned = sc->nr_scanned;
+       nr_reclaimed = sc->nr_reclaimed;
+       nr_scanned = sc->nr_scanned;
  
-               memcg = mem_cgroup_iter(root, NULL, NULL);
-               do {
-                       unsigned long reclaimed;
-                       unsigned long scanned;
+       memcg = mem_cgroup_iter(root, NULL, NULL);
+       do {
+               unsigned long reclaimed;
+               unsigned long scanned;
  
-                       switch (mem_cgroup_protected(root, memcg)) {
-                       case MEMCG_PROT_MIN:
-                               /*
-                                * Hard protection.
-                                * If there is no reclaimable memory, OOM.
-                                */
+               switch (mem_cgroup_protected(root, memcg)) {
+               case MEMCG_PROT_MIN:
+                       /*
+                        * Hard protection.
+                        * If there is no reclaimable memory, OOM.
+                        */
+                       continue;
+               case MEMCG_PROT_LOW:
+                       /*
+                        * Soft protection.
+                        * Respect the protection only as long as
+                        * there is an unprotected supply
+                        * of reclaimable memory from other cgroups.
+                        */
+                       if (!sc->memcg_low_reclaim) {
+                               sc->memcg_low_skipped = 1;
                                 continue;
-                       case MEMCG_PROT_LOW:
-                               /*
-                                * Soft protection.
-                                * Respect the protection only as long as
-                                * there is an unprotected supply
-                                * of reclaimable memory from other cgroups.
-                                */
-                               if (!sc->memcg_low_reclaim) {
-                                       sc->memcg_low_skipped = 1;
-                                       continue;
-                               }
-                               memcg_memory_event(memcg, MEMCG_LOW);
-                               break;
-                       case MEMCG_PROT_NONE:
-                               /*
-                                * All protection thresholds breached. We may
-                                * still choose to vary the scan pressure
-                                * applied based on by how much the cgroup in
-                                * question has exceeded its protection
-                                * thresholds (see get_scan_count).
-                                */
-                               break;
                         }
+                       memcg_memory_event(memcg, MEMCG_LOW);
+                       break;
+               case MEMCG_PROT_NONE:
+                       /*
+                        * All protection thresholds breached. We may
+                        * still choose to vary the scan pressure
+                        * applied based on by how much the cgroup in
+                        * question has exceeded its protection
+                        * thresholds (see get_scan_count).
+                        */
+                       break;
+               }
  
-                       reclaimed = sc->nr_reclaimed;
-                       scanned = sc->nr_scanned;
-                       shrink_node_memcg(pgdat, memcg, sc);
-
-                       shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
-                                       sc->priority);
-
-                       /* Record the group's reclaim efficiency */
-                       vmpressure(sc->gfp_mask, memcg, false,
-                                  sc->nr_scanned - scanned,
-                                  sc->nr_reclaimed - reclaimed);
-
-               } while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
+               reclaimed = sc->nr_reclaimed;
+               scanned = sc->nr_scanned;
+               shrink_node_memcg(pgdat, memcg, sc);
  
-               if (reclaim_state) {
-                       sc->nr_reclaimed += reclaim_state->reclaimed_slab;
-                       reclaim_state->reclaimed_slab = 0;
-               }
+               shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
+                           sc->priority);
  
-               /* Record the subtree's reclaim efficiency */
-               vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
-                          sc->nr_scanned - nr_scanned,
-                          sc->nr_reclaimed - nr_reclaimed);
+               /* Record the group's reclaim efficiency */
+               vmpressure(sc->gfp_mask, memcg, false,
+                          sc->nr_scanned - scanned,
+                          sc->nr_reclaimed - reclaimed);
  
-               if (sc->nr_reclaimed - nr_reclaimed)
-                       reclaimable = true;
+       } while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
  
-               if (current_is_kswapd()) {
-                       /*
-                        * If reclaim is isolating dirty pages under writeback,
-                        * it implies that the long-lived page allocation rate
-                        * is exceeding the page laundering rate. Either the
-                        * global limits are not being effective at throttling
-                        * processes due to the page distribution throughout
-                        * zones or there is heavy usage of a slow backing
-                        * device. The only option is to throttle from reclaim
-                        * context which is not ideal as there is no guarantee
-                        * the dirtying process is throttled in the same way
-                        * balance_dirty_pages() manages.
-                        *
-                        * Once a node is flagged PGDAT_WRITEBACK, kswapd will
-                        * count the number of pages under pages flagged for
-                        * immediate reclaim and stall if any are encountered
-                        * in the nr_immediate check below.
-                        */
-                       if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
-                               set_bit(PGDAT_WRITEBACK, &pgdat->flags);
+       if (reclaim_state) {
+               sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+               reclaim_state->reclaimed_slab = 0;
+       }
  
-                       /*
-                        * Tag a node as congested if all the dirty pages
-                        * scanned were backed by a congested BDI and
-                        * wait_iff_congested will stall.
-                        */
-                       if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
-                               set_bit(PGDAT_CONGESTED, &pgdat->flags);
+       /* Record the subtree's reclaim efficiency */
+       vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+                  sc->nr_scanned - nr_scanned,
+                  sc->nr_reclaimed - nr_reclaimed);
  
-                       /* Allow kswapd to start writing pages during reclaim.*/
-                       if (sc->nr.unqueued_dirty == sc->nr.file_taken)
-                               set_bit(PGDAT_DIRTY, &pgdat->flags);
+       if (sc->nr_reclaimed - nr_reclaimed)
+               reclaimable = true;
  
-                       /*
-                        * If kswapd scans pages marked marked for immediate
-                        * reclaim and under writeback (nr_immediate), it
-                        * implies that pages are cycling through the LRU
-                        * faster than they are written so also forcibly stall.
-                        */
-                       if (sc->nr.immediate)
-                               congestion_wait(BLK_RW_ASYNC, HZ/10);
-               }
+       if (current_is_kswapd()) {
+               /*
+                * If reclaim is isolating dirty pages under writeback,
+                * it implies that the long-lived page allocation rate
+                * is exceeding the page laundering rate. Either the
+                * global limits are not being effective at throttling
+                * processes due to the page distribution throughout
+                * zones or there is heavy usage of a slow backing
+                * device. The only option is to throttle from reclaim
+                * context which is not ideal as there is no guarantee
+                * the dirtying process is throttled in the same way
+                * balance_dirty_pages() manages.
+                *
+                * Once a node is flagged PGDAT_WRITEBACK, kswapd will
+                * count the number of pages under pages flagged for
+                * immediate reclaim and stall if any are encountered
+                * in the nr_immediate check below.
+                */
+               if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
+                       set_bit(PGDAT_WRITEBACK, &pgdat->flags);
  
                 /*
-                * Legacy memcg will stall in page writeback so avoid forcibly
-                * stalling in wait_iff_congested().
+                * Tag a node as congested if all the dirty pages
+                * scanned were backed by a congested BDI and
+                * wait_iff_congested will stall.
                  */
-               if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
-                   sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
-                       set_memcg_congestion(pgdat, root, true);
+               if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+                       set_bit(PGDAT_CONGESTED, &pgdat->flags);
+
+               /* Allow kswapd to start writing pages during reclaim.*/
+               if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+                       set_bit(PGDAT_DIRTY, &pgdat->flags);
  
                 /*
-                * Stall direct reclaim for IO completions if underlying BDIs
-                * and node is congested. Allow kswapd to continue until it
-                * starts encountering unqueued dirty pages or cycling through
-                * the LRU too quickly.
+                * If kswapd scans pages marked marked for immediate
+                * reclaim and under writeback (nr_immediate), it
+                * implies that pages are cycling through the LRU
+                * faster than they are written so also forcibly stall.
                  */
-               if (!sc->hibernation_mode && !current_is_kswapd() &&
-                  current_may_throttle() && pgdat_memcg_congested(pgdat, root))
-                       wait_iff_congested(BLK_RW_ASYNC, HZ/10);
+               if (sc->nr.immediate)
+                       congestion_wait(BLK_RW_ASYNC, HZ/10);
+       }
+
+       /*
+        * Legacy memcg will stall in page writeback so avoid forcibly
+        * stalling in wait_iff_congested().
+        */
+       if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
+           sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+               set_memcg_congestion(pgdat, root, true);
+
+       /*
+        * Stall direct reclaim for IO completions if underlying BDIs
+        * and node is congested. Allow kswapd to continue until it
+        * starts encountering unqueued dirty pages or cycling through
+        * the LRU too quickly.
+        */
+       if (!sc->hibernation_mode && !current_is_kswapd() &&
+           current_may_throttle() && pgdat_memcg_congested(pgdat, root))
+               wait_iff_congested(BLK_RW_ASYNC, HZ/10);
  
-       } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
-                                        sc));
+       if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
+                                   sc))
+               goto again;
  
         /*
          * Kswapd gives up on balancing particular nodes after too
author	Johannes Weiner <hannes@cmpxchg.org>
	Sun, 1 Dec 2019 01:55:43 +0000 (17:55 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 1 Dec 2019 20:59:07 +0000 (12:59 -0800)