mm: vmscan: memcontrol: remove mem_cgroup_select_victim_node()

author Shakeel Butt <shakeelb@google.com>

Sun, 1 Dec 2019 01:50:16 +0000 (17:50 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sun, 1 Dec 2019 14:29:18 +0000 (06:29 -0800)
author Shakeel Butt <shakeelb@google.com>
Sun, 1 Dec 2019 01:50:16 +0000 (17:50 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sun, 1 Dec 2019 14:29:18 +0000 (06:29 -0800)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index e82928deea88dfc5c4efdbc8869a64c37d6f448e..239e752a78177d7e1fc206cca2171cf83d80663f 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -80,7 +80,6 @@ struct mem_cgroup_id {
  enum mem_cgroup_events_target {
         MEM_CGROUP_TARGET_THRESH,
         MEM_CGROUP_TARGET_SOFTLIMIT,
-       MEM_CGROUP_TARGET_NUMAINFO,
         MEM_CGROUP_NTARGETS,
  };
  
@@ -312,13 +311,6 @@ struct mem_cgroup {
         struct list_head kmem_caches;
  #endif
  
-       int last_scanned_node;
-#if MAX_NUMNODES > 1
-       nodemask_t      scan_nodes;
-       atomic_t        numainfo_events;
-       atomic_t        numainfo_updating;
-#endif
-
  #ifdef CONFIG_CGROUP_WRITEBACK
         struct list_head cgwb_list;
         struct wb_domain cgwb_domain;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 94a5b6d831f9344b4fa532b09b50c4ac24a13f0f..529e12a59131974a850262cafdb7f293b5b41008 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -108,7 +108,6 @@ static const char *const mem_cgroup_lru_names[] = {
  
  #define THRESHOLDS_EVENTS_TARGET 128
  #define SOFTLIMIT_EVENTS_TARGET 1024
-#define NUMAINFO_EVENTS_TARGET 1024
  
  /*
   * Cgroups above their limits are maintained in a RB-Tree, independent of
@@ -877,9 +876,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
                 case MEM_CGROUP_TARGET_SOFTLIMIT:
                         next = val + SOFTLIMIT_EVENTS_TARGET;
                         break;
-               case MEM_CGROUP_TARGET_NUMAINFO:
-                       next = val + NUMAINFO_EVENTS_TARGET;
-                       break;
                 default:
                         break;
                 }
@@ -899,21 +895,12 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
         if (unlikely(mem_cgroup_event_ratelimit(memcg,
                                                 MEM_CGROUP_TARGET_THRESH))) {
                 bool do_softlimit;
-               bool do_numainfo __maybe_unused;
  
                 do_softlimit = mem_cgroup_event_ratelimit(memcg,
                                                 MEM_CGROUP_TARGET_SOFTLIMIT);
-#if MAX_NUMNODES > 1
-               do_numainfo = mem_cgroup_event_ratelimit(memcg,
-                                               MEM_CGROUP_TARGET_NUMAINFO);
-#endif
                 mem_cgroup_threshold(memcg);
                 if (unlikely(do_softlimit))
                         mem_cgroup_update_tree(memcg, page);
-#if MAX_NUMNODES > 1
-               if (unlikely(do_numainfo))
-                       atomic_inc(&memcg->numainfo_events);
-#endif
         }
  }
  
@@ -1591,104 +1578,6 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
         return ret;
  }
  
-#if MAX_NUMNODES > 1
-
-/**
- * test_mem_cgroup_node_reclaimable
- * @memcg: the target memcg
- * @nid: the node ID to be checked.
- * @noswap : specify true here if the user wants flle only information.
- *
- * This function returns whether the specified memcg contains any
- * reclaimable pages on a node. Returns true if there are any reclaimable
- * pages in the node.
- */
-static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
-               int nid, bool noswap)
-{
-       struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
-
-       if (lruvec_page_state(lruvec, NR_INACTIVE_FILE) ||
-           lruvec_page_state(lruvec, NR_ACTIVE_FILE))
-               return true;
-       if (noswap || !total_swap_pages)
-               return false;
-       if (lruvec_page_state(lruvec, NR_INACTIVE_ANON) ||
-           lruvec_page_state(lruvec, NR_ACTIVE_ANON))
-               return true;
-       return false;
-
-}
-
-/*
- * Always updating the nodemask is not very good - even if we have an empty
- * list or the wrong list here, we can start from some node and traverse all
- * nodes based on the zonelist. So update the list loosely once per 10 secs.
- *
- */
-static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg)
-{
-       int nid;
-       /*
-        * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
-        * pagein/pageout changes since the last update.
-        */
-       if (!atomic_read(&memcg->numainfo_events))
-               return;
-       if (atomic_inc_return(&memcg->numainfo_updating) > 1)
-               return;
-
-       /* make a nodemask where this memcg uses memory from */
-       memcg->scan_nodes = node_states[N_MEMORY];
-
-       for_each_node_mask(nid, node_states[N_MEMORY]) {
-
-               if (!test_mem_cgroup_node_reclaimable(memcg, nid, false))
-                       node_clear(nid, memcg->scan_nodes);
-       }
-
-       atomic_set(&memcg->numainfo_events, 0);
-       atomic_set(&memcg->numainfo_updating, 0);
-}
-
-/*
- * Selecting a node where we start reclaim from. Because what we need is just
- * reducing usage counter, start from anywhere is O,K. Considering
- * memory reclaim from current node, there are pros. and cons.
- *
- * Freeing memory from current node means freeing memory from a node which
- * we'll use or we've used. So, it may make LRU bad. And if several threads
- * hit limits, it will see a contention on a node. But freeing from remote
- * node means more costs for memory reclaim because of memory latency.
- *
- * Now, we use round-robin. Better algorithm is welcomed.
- */
-int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
-{
-       int node;
-
-       mem_cgroup_may_update_nodemask(memcg);
-       node = memcg->last_scanned_node;
-
-       node = next_node_in(node, memcg->scan_nodes);
-       /*
-        * mem_cgroup_may_update_nodemask might have seen no reclaimmable pages
-        * last time it really checked all the LRUs due to rate limiting.
-        * Fallback to the current node in that case for simplicity.
-        */
-       if (unlikely(node == MAX_NUMNODES))
-               node = numa_node_id();
-
-       memcg->last_scanned_node = node;
-       return node;
-}
-#else
-int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
-{
-       return 0;
-}
-#endif
-
  static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
                                    pg_data_t *pgdat,
                                    gfp_t gfp_mask,
@@ -5073,7 +4962,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
                 goto fail;
  
         INIT_WORK(&memcg->high_work, high_work_func);
-       memcg->last_scanned_node = MAX_NUMNODES;
         INIT_LIST_HEAD(&memcg->oom_notify);
         mutex_init(&memcg->thresholds_lock);
         spin_lock_init(&memcg->move_lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c

index ee4eecc7e1c2177041d00d8eb8d62ca64647206a..2beff0e0dc7b644224596d3dd1cd07f3b608cdc8 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3348,10 +3348,8 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                            gfp_t gfp_mask,
                                            bool may_swap)
  {
-       struct zonelist *zonelist;
         unsigned long nr_reclaimed;
         unsigned long pflags;
-       int nid;
         unsigned int noreclaim_flag;
         struct scan_control sc = {
                 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
@@ -3364,16 +3362,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                 .may_unmap = 1,
                 .may_swap = may_swap,
         };
-
-       set_task_reclaim_state(current, &sc.reclaim_state);
         /*
-        * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
-        * take care of from where we get pages. So the node where we start the
-        * scan does not need to be the current node.
+        * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
+        * equal pressure on all the nodes. This is based on the assumption that
+        * the reclaim does not bail out early.
          */
-       nid = mem_cgroup_select_victim_node(memcg);
+       struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
  
-       zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
+       set_task_reclaim_state(current, &sc.reclaim_state);
  
         trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);
author	Shakeel Butt <shakeelb@google.com>
	Sun, 1 Dec 2019 01:50:16 +0000 (17:50 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 1 Dec 2019 14:29:18 +0000 (06:29 -0800)
include/linux/memcontrol.h		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history