c3f534a9e0c719f65ca2f3785fce873d5fa44997
[openwrt/staging/linusw.git] /
1 From f3c93d2e37a3c56593d7ccf4f4bcf1b58426fdd8 Mon Sep 17 00:00:00 2001
2 From: Yu Zhao <yuzhao@google.com>
3 Date: Wed, 21 Dec 2022 21:19:02 -0700
4 Subject: [PATCH 04/19] BACKPORT: mm: multi-gen LRU: remove aging fairness
5 safeguard
6
7 Recall that the aging produces the youngest generation: first it scans
8 for accessed folios and updates their gen counters; then it increments
9 lrugen->max_seq.
10
11 The current aging fairness safeguard for kswapd uses two passes to
12 ensure the fairness to multiple eligible memcgs. On the first pass,
13 which is shared with the eviction, it checks whether all eligible
14 memcgs are low on cold folios. If so, it requires a second pass, on
15 which it ages all those memcgs at the same time.
16
17 With memcg LRU, the aging, while ensuring eventual fairness, will run
18 when necessary. Therefore the current aging fairness safeguard for
19 kswapd will not be needed.
20
21 Note that memcg LRU only applies to global reclaim. For memcg reclaim,
22 the aging can be unfair to different memcgs, i.e., their
23 lrugen->max_seq can be incremented at different paces.
24
25 Link: https://lkml.kernel.org/r/20221222041905.2431096-5-yuzhao@google.com
26 Signed-off-by: Yu Zhao <yuzhao@google.com>
27 Cc: Johannes Weiner <hannes@cmpxchg.org>
28 Cc: Jonathan Corbet <corbet@lwn.net>
29 Cc: Michael Larabel <Michael@MichaelLarabel.com>
30 Cc: Michal Hocko <mhocko@kernel.org>
31 Cc: Mike Rapoport <rppt@kernel.org>
32 Cc: Roman Gushchin <roman.gushchin@linux.dev>
33 Cc: Suren Baghdasaryan <surenb@google.com>
34 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
35 Bug: 274865848
36 (cherry picked from commit 7348cc91821b0cb24dfb00e578047f68299a50ab)
37 [TJ: Resolved conflicts with older function signatures for
38 min_cgroup_below_min / min_cgroup_below_low]
39 Change-Id: I6e36ecfbaaefbc0a56d9a9d5d7cbe404ed7f57a5
40 Signed-off-by: T.J. Mercier <tjmercier@google.com>
41 ---
42 mm/vmscan.c | 126 ++++++++++++++++++++++++----------------------------
43 1 file changed, 59 insertions(+), 67 deletions(-)
44
45 diff --git a/mm/vmscan.c b/mm/vmscan.c
46 index 991961180b320..5a2e83e673232 100644
47 --- a/mm/vmscan.c
48 +++ b/mm/vmscan.c
49 @@ -136,7 +136,6 @@ struct scan_control {
50
51 #ifdef CONFIG_LRU_GEN
52 /* help kswapd make better choices among multiple memcgs */
53 - unsigned int memcgs_need_aging:1;
54 unsigned long last_reclaimed;
55 #endif
56
57 @@ -4455,7 +4454,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
58 return true;
59 }
60
61 -static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
62 +static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
63 struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
64 {
65 int gen, type, zone;
66 @@ -4464,6 +4463,13 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
67 unsigned long total = 0;
68 struct lru_gen_folio *lrugen = &lruvec->lrugen;
69 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
70 + DEFINE_MIN_SEQ(lruvec);
71 +
72 + /* whether this lruvec is completely out of cold folios */
73 + if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
74 + *nr_to_scan = 0;
75 + return true;
76 + }
77
78 for (type = !can_swap; type < ANON_AND_FILE; type++) {
79 unsigned long seq;
80 @@ -4492,8 +4498,6 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
81 * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
82 * ideal number of generations is MIN_NR_GENS+1.
83 */
84 - if (min_seq[!can_swap] + MIN_NR_GENS > max_seq)
85 - return true;
86 if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
87 return false;
88
89 @@ -4512,40 +4516,54 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
90 return false;
91 }
92
93 -static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
94 +static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
95 {
96 - bool need_aging;
97 - unsigned long nr_to_scan;
98 - int swappiness = get_swappiness(lruvec, sc);
99 + int gen, type, zone;
100 + unsigned long total = 0;
101 + bool can_swap = get_swappiness(lruvec, sc);
102 + struct lru_gen_folio *lrugen = &lruvec->lrugen;
103 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
104 DEFINE_MAX_SEQ(lruvec);
105 DEFINE_MIN_SEQ(lruvec);
106
107 - VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
108 + for (type = !can_swap; type < ANON_AND_FILE; type++) {
109 + unsigned long seq;
110
111 - mem_cgroup_calculate_protection(NULL, memcg);
112 + for (seq = min_seq[type]; seq <= max_seq; seq++) {
113 + gen = lru_gen_from_seq(seq);
114
115 - if (mem_cgroup_below_min(memcg))
116 - return false;
117 + for (zone = 0; zone < MAX_NR_ZONES; zone++)
118 + total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
119 + }
120 + }
121
122 - need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
123 + /* whether the size is big enough to be helpful */
124 + return mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
125 +}
126
127 - if (min_ttl) {
128 - int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
129 - unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
130 +static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc,
131 + unsigned long min_ttl)
132 +{
133 + int gen;
134 + unsigned long birth;
135 + struct mem_cgroup *memcg = lruvec_memcg(lruvec);
136 + DEFINE_MIN_SEQ(lruvec);
137
138 - if (time_is_after_jiffies(birth + min_ttl))
139 - return false;
140 + VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
141
142 - /* the size is likely too small to be helpful */
143 - if (!nr_to_scan && sc->priority != DEF_PRIORITY)
144 - return false;
145 - }
146 + /* see the comment on lru_gen_folio */
147 + gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
148 + birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
149
150 - if (need_aging)
151 - try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
152 + if (time_is_after_jiffies(birth + min_ttl))
153 + return false;
154
155 - return true;
156 + if (!lruvec_is_sizable(lruvec, sc))
157 + return false;
158 +
159 + mem_cgroup_calculate_protection(NULL, memcg);
160 +
161 + return !mem_cgroup_below_min(memcg);
162 }
163
164 /* to protect the working set of the last N jiffies */
165 @@ -4554,46 +4572,32 @@ static unsigned long lru_gen_min_ttl __read_mostly;
166 static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
167 {
168 struct mem_cgroup *memcg;
169 - bool success = false;
170 unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
171
172 VM_WARN_ON_ONCE(!current_is_kswapd());
173
174 sc->last_reclaimed = sc->nr_reclaimed;
175
176 - /*
177 - * To reduce the chance of going into the aging path, which can be
178 - * costly, optimistically skip it if the flag below was cleared in the
179 - * eviction path. This improves the overall performance when multiple
180 - * memcgs are available.
181 - */
182 - if (!sc->memcgs_need_aging) {
183 - sc->memcgs_need_aging = true;
184 + /* check the order to exclude compaction-induced reclaim */
185 + if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
186 return;
187 - }
188 -
189 - set_mm_walk(pgdat);
190
191 memcg = mem_cgroup_iter(NULL, NULL, NULL);
192 do {
193 struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
194
195 - if (age_lruvec(lruvec, sc, min_ttl))
196 - success = true;
197 + if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
198 + mem_cgroup_iter_break(NULL, memcg);
199 + return;
200 + }
201
202 cond_resched();
203 } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
204
205 - clear_mm_walk();
206 -
207 - /* check the order to exclude compaction-induced reclaim */
208 - if (success || !min_ttl || sc->order)
209 - return;
210 -
211 /*
212 * The main goal is to OOM kill if every generation from all memcgs is
213 * younger than min_ttl. However, another possibility is all memcgs are
214 - * either below min or empty.
215 + * either too small or below min.
216 */
217 if (mutex_trylock(&oom_lock)) {
218 struct oom_control oc = {
219 @@ -5101,33 +5105,27 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
220 * reclaim.
221 */
222 static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
223 - bool can_swap, bool *need_aging)
224 + bool can_swap)
225 {
226 unsigned long nr_to_scan;
227 struct mem_cgroup *memcg = lruvec_memcg(lruvec);
228 DEFINE_MAX_SEQ(lruvec);
229 - DEFINE_MIN_SEQ(lruvec);
230
231 if (mem_cgroup_below_min(memcg) ||
232 (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
233 return 0;
234
235 - *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
236 - if (!*need_aging)
237 + if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
238 return nr_to_scan;
239
240 /* skip the aging path at the default priority */
241 if (sc->priority == DEF_PRIORITY)
242 - goto done;
243 + return nr_to_scan;
244
245 - /* leave the work to lru_gen_age_node() */
246 - if (current_is_kswapd())
247 - return 0;
248 + try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
249
250 - if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
251 - return nr_to_scan;
252 -done:
253 - return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
254 + /* skip this lruvec as it's low on cold folios */
255 + return 0;
256 }
257
258 static unsigned long get_nr_to_reclaim(struct scan_control *sc)
259 @@ -5146,9 +5144,7 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
260 static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
261 {
262 struct blk_plug plug;
263 - bool need_aging = false;
264 unsigned long scanned = 0;
265 - unsigned long reclaimed = sc->nr_reclaimed;
266 unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
267
268 lru_add_drain();
269 @@ -5169,13 +5165,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
270 else
271 swappiness = 0;
272
273 - nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
274 + nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
275 if (!nr_to_scan)
276 - goto done;
277 + break;
278
279 delta = evict_folios(lruvec, sc, swappiness);
280 if (!delta)
281 - goto done;
282 + break;
283
284 scanned += delta;
285 if (scanned >= nr_to_scan)
286 @@ -5187,10 +5183,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
287 cond_resched();
288 }
289
290 - /* see the comment in lru_gen_age_node() */
291 - if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
292 - sc->memcgs_need_aging = false;
293 -done:
294 clear_mm_walk();
295
296 blk_finish_plug(&plug);
297 --
298 2.40.1
299