1 From e20b7386fccc18c791796eb1dc1a91eee3ccf801 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Wed, 21 Dec 2022 21:19:02 -0700
4 Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard
6 Recall that the aging produces the youngest generation: first it scans
7 for accessed pages and updates their gen counters; then it increments
10 The current aging fairness safeguard for kswapd uses two passes to
11 ensure the fairness to multiple eligible memcgs. On the first pass,
12 which is shared with the eviction, it checks whether all eligible
13 memcgs are low on cold pages. If so, it requires a second pass, on
14 which it ages all those memcgs at the same time.
16 With memcg LRU, the aging, while ensuring eventual fairness, will run
17 when necessary. Therefore the current aging fairness safeguard for
18 kswapd will not be needed.
20 Note that memcg LRU only applies to global reclaim. For memcg reclaim,
21 the aging can be unfair to different memcgs, i.e., their
22 lrugen->max_seq can be incremented at different paces.
24 Link: https://lkml.kernel.org/r/20221222041905.2431096-5-yuzhao@google.com
25 Signed-off-by: Yu Zhao <yuzhao@google.com>
26 Cc: Johannes Weiner <hannes@cmpxchg.org>
27 Cc: Jonathan Corbet <corbet@lwn.net>
28 Cc: Michael Larabel <Michael@MichaelLarabel.com>
29 Cc: Michal Hocko <mhocko@kernel.org>
30 Cc: Mike Rapoport <rppt@kernel.org>
31 Cc: Roman Gushchin <roman.gushchin@linux.dev>
32 Cc: Suren Baghdasaryan <surenb@google.com>
33 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
35 mm/vmscan.c | 126 ++++++++++++++++++++++++----------------------------
36 1 file changed, 59 insertions(+), 67 deletions(-)
38 diff --git a/mm/vmscan.c b/mm/vmscan.c
39 index 40e7a947c5c7..7159436872ba 100644
42 @@ -131,7 +131,6 @@ struct scan_control {
45 /* help kswapd make better choices among multiple memcgs */
46 - unsigned int memcgs_need_aging:1;
47 unsigned long last_reclaimed;
50 @@ -4184,7 +4183,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
54 -static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
55 +static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
56 struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
59 @@ -4193,6 +4192,13 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
60 unsigned long total = 0;
61 struct lru_gen_page *lrugen = &lruvec->lrugen;
62 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
63 + DEFINE_MIN_SEQ(lruvec);
65 + /* whether this lruvec is completely out of cold pages */
66 + if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
71 for (type = !can_swap; type < ANON_AND_FILE; type++) {
73 @@ -4221,8 +4227,6 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
74 * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
75 * ideal number of generations is MIN_NR_GENS+1.
77 - if (min_seq[!can_swap] + MIN_NR_GENS > max_seq)
79 if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
82 @@ -4241,40 +4245,54 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
86 -static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
87 +static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
90 - unsigned long nr_to_scan;
91 - int swappiness = get_swappiness(lruvec, sc);
92 + int gen, type, zone;
93 + unsigned long total = 0;
94 + bool can_swap = get_swappiness(lruvec, sc);
95 + struct lru_gen_page *lrugen = &lruvec->lrugen;
96 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
97 DEFINE_MAX_SEQ(lruvec);
98 DEFINE_MIN_SEQ(lruvec);
100 - VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
101 + for (type = !can_swap; type < ANON_AND_FILE; type++) {
104 - mem_cgroup_calculate_protection(NULL, memcg);
105 + for (seq = min_seq[type]; seq <= max_seq; seq++) {
106 + gen = lru_gen_from_seq(seq);
108 - if (mem_cgroup_below_min(memcg))
110 + for (zone = 0; zone < MAX_NR_ZONES; zone++)
111 + total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
115 - need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
116 + /* whether the size is big enough to be helpful */
117 + return mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
121 - int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
122 - unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
123 +static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc,
124 + unsigned long min_ttl)
127 + unsigned long birth;
128 + struct mem_cgroup *memcg = lruvec_memcg(lruvec);
129 + DEFINE_MIN_SEQ(lruvec);
131 - if (time_is_after_jiffies(birth + min_ttl))
133 + VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
135 - /* the size is likely too small to be helpful */
136 - if (!nr_to_scan && sc->priority != DEF_PRIORITY)
139 + /* see the comment on lru_gen_page */
140 + gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
141 + birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
144 - try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
145 + if (time_is_after_jiffies(birth + min_ttl))
149 + if (!lruvec_is_sizable(lruvec, sc))
152 + mem_cgroup_calculate_protection(NULL, memcg);
154 + return !mem_cgroup_below_min(memcg);
157 /* to protect the working set of the last N jiffies */
158 @@ -4283,46 +4301,32 @@ static unsigned long lru_gen_min_ttl __read_mostly;
159 static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
161 struct mem_cgroup *memcg;
162 - bool success = false;
163 unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
165 VM_WARN_ON_ONCE(!current_is_kswapd());
167 sc->last_reclaimed = sc->nr_reclaimed;
170 - * To reduce the chance of going into the aging path, which can be
171 - * costly, optimistically skip it if the flag below was cleared in the
172 - * eviction path. This improves the overall performance when multiple
173 - * memcgs are available.
175 - if (!sc->memcgs_need_aging) {
176 - sc->memcgs_need_aging = true;
177 + /* check the order to exclude compaction-induced reclaim */
178 + if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
182 - set_mm_walk(pgdat);
184 memcg = mem_cgroup_iter(NULL, NULL, NULL);
186 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
188 - if (age_lruvec(lruvec, sc, min_ttl))
190 + if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
191 + mem_cgroup_iter_break(NULL, memcg);
196 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
200 - /* check the order to exclude compaction-induced reclaim */
201 - if (success || !min_ttl || sc->order)
205 * The main goal is to OOM kill if every generation from all memcgs is
206 * younger than min_ttl. However, another possibility is all memcgs are
207 - * either below min or empty.
208 + * either too small or below min.
210 if (mutex_trylock(&oom_lock)) {
211 struct oom_control oc = {
212 @@ -4830,33 +4834,27 @@ static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swapp
215 static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
216 - bool can_swap, bool *need_aging)
219 unsigned long nr_to_scan;
220 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
221 DEFINE_MAX_SEQ(lruvec);
222 - DEFINE_MIN_SEQ(lruvec);
224 if (mem_cgroup_below_min(memcg) ||
225 (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
228 - *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
230 + if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
233 /* skip the aging path at the default priority */
234 if (sc->priority == DEF_PRIORITY)
238 - /* leave the work to lru_gen_age_node() */
239 - if (current_is_kswapd())
241 + try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
243 - if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
246 - return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
247 + /* skip this lruvec as it's low on cold pages */
251 static unsigned long get_nr_to_reclaim(struct scan_control *sc)
252 @@ -4875,9 +4873,7 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
253 static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
255 struct blk_plug plug;
256 - bool need_aging = false;
257 unsigned long scanned = 0;
258 - unsigned long reclaimed = sc->nr_reclaimed;
259 unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
262 @@ -4898,13 +4894,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
266 - nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
267 + nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
272 delta = evict_pages(lruvec, sc, swappiness);
278 if (scanned >= nr_to_scan)
279 @@ -4916,10 +4912,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
283 - /* see the comment in lru_gen_age_node() */
284 - if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
285 - sc->memcgs_need_aging = false;
289 blk_finish_plug(&plug);