From b28b8ed1f45de1ecff0a893345a0fcbf3fff3bbe Mon Sep 17 00:00:00 2001 From: Kazuki Hashimoto Date: Wed, 24 May 2023 23:56:36 +0900 Subject: [PATCH] kernel: 6.1: Synchronize MGLRU patches with upstream Replace the refreshed 5.15 backports with backports for 6.1. This fixes FMODE_CAN_ODIRECT having the same value as FMODE_NOREUSE. Signed-off-by: Kazuki Hashimoto --- ...-gen-LRU-rename-lru_gen_struct-to-l.patch} | 59 ++- ...-gen-LRU-rename-lrugen-lists-to-lru.patch} | 80 +++- ...-gen-LRU-remove-eviction-fairness-s.patch} | 34 +- ...-gen-LRU-remove-aging-fairness-safe.patch} | 34 +- ...ti-gen-LRU-shuffle-should_run_aging.patch} | 20 +- ...-gen-LRU-per-node-lru_gen_folio-lis.patch} | 118 +++-- ...-gen-LRU-clarify-scan_control-flags.patch} | 47 +- ...-gen-LRU-simplify-arch_has_hw_pte_y.patch} | 17 +- ...-multi-gen-LRU-avoid-futile-retries.patch} | 21 +- ...-10-UPSTREAM-mm-add-vma_has_recency.patch} | 45 +- ...TREAM-mm-support-POSIX_FADV_NOREUSE.patch} | 29 +- ...i-gen-LRU-section-for-working-set-pr.patch | 74 +++ ...i-gen-LRU-section-for-rmap-PT-walk-f.patch | 64 +++ ...ti-gen-LRU-section-for-Bloom-filters.patch | 250 ++++++++++ ...-multi-gen-LRU-section-for-memcg-LRU.patch | 440 ++++++++++++++++++ ...i-gen-LRU-improve-lru_gen_exit_memcg.patch | 45 ++ ...multi-gen-LRU-improve-walk_pmd_range.patch | 140 ++++++ ...i-gen-LRU-simplify-lru_gen_look_arou.patch | 153 ++++++ 18 files changed, 1505 insertions(+), 165 deletions(-) rename target/linux/generic/backport-6.1/{020-v6.3-21-mm-multi-gen-LRU-rename-lru_gen_struct-to-lru_gen_pa.patch => 020-v6.3-01-UPSTREAM-mm-multi-gen-LRU-rename-lru_gen_struct-to-l.patch} (81%) rename target/linux/generic/backport-6.1/{020-v6.3-22-mm-multi-gen-LRU-rename-lrugen-lists-to-lrugen-pages.patch => 020-v6.3-02-UPSTREAM-mm-multi-gen-LRU-rename-lrugen-lists-to-lru.patch} (60%) rename target/linux/generic/backport-6.1/{020-v6.3-23-mm-multi-gen-LRU-remove-eviction-fairness-safeguard.patch => 020-v6.3-03-UPSTREAM-mm-multi-gen-LRU-remove-eviction-fairness-s.patch} (79%) rename target/linux/generic/backport-6.1/{020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch => 020-v6.3-04-BACKPORT-mm-multi-gen-LRU-remove-aging-fairness-safe.patch} (85%) rename target/linux/generic/backport-6.1/{020-v6.3-25-mm-multi-gen-LRU-shuffle-should_run_aging.patch => 020-v6.3-05-UPSTREAM-mm-multi-gen-LRU-shuffle-should_run_aging.patch} (87%) rename target/linux/generic/backport-6.1/{020-v6.3-26-mm-multi-gen-LRU-per-node-lru_gen_page-lists.patch => 020-v6.3-06-BACKPORT-mm-multi-gen-LRU-per-node-lru_gen_folio-lis.patch} (85%) rename target/linux/generic/backport-6.1/{020-v6.3-27-mm-multi-gen-LRU-clarify-scan_control-flags.patch => 020-v6.3-07-BACKPORT-mm-multi-gen-LRU-clarify-scan_control-flags.patch} (71%) rename target/linux/generic/backport-6.1/{020-v6.3-28-mm-multi-gen-LRU-simplify-arch_has_hw_pte_young-chec.patch => 020-v6.3-08-UPSTREAM-mm-multi-gen-LRU-simplify-arch_has_hw_pte_y.patch} (67%) rename target/linux/generic/backport-6.1/{020-v6.3-29-mm-multi-gen-LRU-avoid-futile-retries.patch => 020-v6.3-09-UPSTREAM-mm-multi-gen-LRU-avoid-futile-retries.patch} (74%) rename target/linux/generic/backport-6.1/{020-v6.3-19-mm-add-vma_has_recency.patch => 020-v6.3-10-UPSTREAM-mm-add-vma_has_recency.patch} (75%) rename target/linux/generic/backport-6.1/{020-v6.3-20-mm-support-POSIX_FADV_NOREUSE.patch => 020-v6.3-11-UPSTREAM-mm-support-POSIX_FADV_NOREUSE.patch} (78%) create mode 100644 target/linux/generic/backport-6.1/020-v6.3-12-UPSTREAM-mm-multi-gen-LRU-section-for-working-set-pr.patch create mode 100644 target/linux/generic/backport-6.1/020-v6.3-13-UPSTREAM-mm-multi-gen-LRU-section-for-rmap-PT-walk-f.patch create mode 100644 target/linux/generic/backport-6.1/020-v6.3-14-UPSTREAM-mm-multi-gen-LRU-section-for-Bloom-filters.patch create mode 100644 target/linux/generic/backport-6.1/020-v6.3-15-UPSTREAM-mm-multi-gen-LRU-section-for-memcg-LRU.patch create mode 100644 target/linux/generic/backport-6.1/020-v6.3-16-UPSTREAM-mm-multi-gen-LRU-improve-lru_gen_exit_memcg.patch create mode 100644 target/linux/generic/backport-6.1/020-v6.3-17-UPSTREAM-mm-multi-gen-LRU-improve-walk_pmd_range.patch create mode 100644 target/linux/generic/backport-6.1/020-v6.3-18-UPSTREAM-mm-multi-gen-LRU-simplify-lru_gen_look_arou.patch diff --git a/target/linux/generic/backport-6.1/020-v6.3-21-mm-multi-gen-LRU-rename-lru_gen_struct-to-lru_gen_pa.patch b/target/linux/generic/backport-6.1/020-v6.3-01-UPSTREAM-mm-multi-gen-LRU-rename-lru_gen_struct-to-l.patch similarity index 81% rename from target/linux/generic/backport-6.1/020-v6.3-21-mm-multi-gen-LRU-rename-lru_gen_struct-to-lru_gen_pa.patch rename to target/linux/generic/backport-6.1/020-v6.3-01-UPSTREAM-mm-multi-gen-LRU-rename-lru_gen_struct-to-l.patch index 881ef4e528..d63b03e6e8 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-21-mm-multi-gen-LRU-rename-lru_gen_struct-to-lru_gen_pa.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-01-UPSTREAM-mm-multi-gen-LRU-rename-lru_gen_struct-to-l.patch @@ -1,7 +1,7 @@ -From 348fdbada9fb3f0bf1a53651be46319105af187f Mon Sep 17 00:00:00 2001 +From 8c20e2eb5f2a0175b774134685e4d7bd93e85ff8 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:18:59 -0700 -Subject: [PATCH 21/29] mm: multi-gen LRU: rename lru_gen_struct to +Subject: [PATCH 01/19] UPSTREAM: mm: multi-gen LRU: rename lru_gen_struct to lru_gen_folio Patch series "mm: multi-gen LRU: memcg LRU", v3. @@ -115,6 +115,10 @@ Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton +Bug: 274865848 +(cherry picked from commit 391655fe08d1f942359a11148aa9aaf3f99d6d6f) +Change-Id: I7df67e0e2435ba28f10eaa57d28d98b61a9210a6 +Signed-off-by: T.J. Mercier --- include/linux/mm_inline.h | 4 ++-- include/linux/mmzone.h | 6 +++--- @@ -122,9 +126,11 @@ Signed-off-by: Andrew Morton mm/workingset.c | 4 ++-- 4 files changed, 24 insertions(+), 24 deletions(-) +diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h +index e8ed225d8f7ca..f63968bd7de59 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h -@@ -178,7 +178,7 @@ static inline void lru_gen_update_size(s +@@ -178,7 +178,7 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli int zone = folio_zonenum(folio); int delta = folio_nr_pages(folio); enum lru_list lru = type * LRU_INACTIVE_FILE; @@ -133,7 +139,7 @@ Signed-off-by: Andrew Morton VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS); VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS); -@@ -224,7 +224,7 @@ static inline bool lru_gen_add_folio(str +@@ -224,7 +224,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); @@ -142,6 +148,8 @@ Signed-off-by: Andrew Morton VM_WARN_ON_ONCE_FOLIO(gen != -1, folio); +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 5f74891556f33..bd3e4689f72dc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -404,7 +404,7 @@ enum { @@ -171,9 +179,11 @@ Signed-off-by: Andrew Morton /* to concurrently iterate lru_gen_mm_list */ struct lru_gen_mm_state mm_state; #endif +diff --git a/mm/vmscan.c b/mm/vmscan.c +index d18296109aa7e..27142caf284c1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -3190,7 +3190,7 @@ static int get_nr_gens(struct lruvec *lr +@@ -3190,7 +3190,7 @@ static int get_nr_gens(struct lruvec *lruvec, int type) static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) { @@ -191,7 +201,7 @@ Signed-off-by: Andrew Morton int hist = lru_hist_from_seq(lrugen->min_seq[type]); pos->refaulted = lrugen->avg_refaulted[type][tier] + -@@ -3611,7 +3611,7 @@ static void read_ctrl_pos(struct lruvec +@@ -3611,7 +3611,7 @@ static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain, static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover) { int hist, tier; @@ -200,7 +210,7 @@ Signed-off-by: Andrew Morton bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1; unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1; -@@ -3688,7 +3688,7 @@ static int folio_update_gen(struct folio +@@ -3688,7 +3688,7 @@ static int folio_update_gen(struct folio *folio, int gen) static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { int type = folio_is_file_lru(folio); @@ -209,7 +219,7 @@ Signed-off-by: Andrew Morton int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); unsigned long new_flags, old_flags = READ_ONCE(folio->flags); -@@ -3733,7 +3733,7 @@ static void update_batch_size(struct lru +@@ -3733,7 +3733,7 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio, static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk) { int gen, type, zone; @@ -218,7 +228,7 @@ Signed-off-by: Andrew Morton walk->batched = 0; -@@ -4253,7 +4253,7 @@ static bool inc_min_seq(struct lruvec *l +@@ -4250,7 +4250,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) { int zone; int remaining = MAX_LRU_BATCH; @@ -227,7 +237,7 @@ Signed-off-by: Andrew Morton int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); if (type == LRU_GEN_ANON && !can_swap) -@@ -4289,7 +4289,7 @@ static bool try_to_inc_min_seq(struct lr +@@ -4286,7 +4286,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) { int gen, type, zone; bool success = false; @@ -236,7 +246,7 @@ Signed-off-by: Andrew Morton DEFINE_MIN_SEQ(lruvec); VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); -@@ -4310,7 +4310,7 @@ next: +@@ -4307,7 +4307,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) ; } @@ -245,7 +255,7 @@ Signed-off-by: Andrew Morton if (can_swap) { min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]); min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]); -@@ -4332,7 +4332,7 @@ static void inc_max_seq(struct lruvec *l +@@ -4329,7 +4329,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan) { int prev, next; int type, zone; @@ -254,7 +264,7 @@ Signed-off-by: Andrew Morton spin_lock_irq(&lruvec->lru_lock); -@@ -4390,7 +4390,7 @@ static bool try_to_inc_max_seq(struct lr +@@ -4387,7 +4387,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, bool success; struct lru_gen_mm_walk *walk; struct mm_struct *mm = NULL; @@ -263,7 +273,7 @@ Signed-off-by: Andrew Morton VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq)); -@@ -4455,7 +4455,7 @@ static bool should_run_aging(struct lruv +@@ -4452,7 +4452,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig unsigned long old = 0; unsigned long young = 0; unsigned long total = 0; @@ -272,7 +282,7 @@ Signed-off-by: Andrew Morton struct mem_cgroup *memcg = lruvec_memcg(lruvec); for (type = !can_swap; type < ANON_AND_FILE; type++) { -@@ -4740,7 +4740,7 @@ static bool sort_folio(struct lruvec *lr +@@ -4737,7 +4737,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) int delta = folio_nr_pages(folio); int refs = folio_lru_refs(folio); int tier = lru_tier_from_refs(refs); @@ -281,7 +291,7 @@ Signed-off-by: Andrew Morton VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio); -@@ -4840,7 +4840,7 @@ static int scan_folios(struct lruvec *lr +@@ -4837,7 +4837,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, int scanned = 0; int isolated = 0; int remaining = MAX_LRU_BATCH; @@ -290,7 +300,7 @@ Signed-off-by: Andrew Morton struct mem_cgroup *memcg = lruvec_memcg(lruvec); VM_WARN_ON_ONCE(!list_empty(list)); -@@ -5240,7 +5240,7 @@ done: +@@ -5237,7 +5237,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc static bool __maybe_unused state_is_valid(struct lruvec *lruvec) { @@ -299,7 +309,7 @@ Signed-off-by: Andrew Morton if (lrugen->enabled) { enum lru_list lru; -@@ -5522,7 +5522,7 @@ static void lru_gen_seq_show_full(struct +@@ -5519,7 +5519,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, int i; int type, tier; int hist = lru_hist_from_seq(seq); @@ -308,7 +318,7 @@ Signed-off-by: Andrew Morton for (tier = 0; tier < MAX_NR_TIERS; tier++) { seq_printf(m, " %10d", tier); -@@ -5572,7 +5572,7 @@ static int lru_gen_seq_show(struct seq_f +@@ -5569,7 +5569,7 @@ static int lru_gen_seq_show(struct seq_file *m, void *v) unsigned long seq; bool full = !debugfs_real_fops(m->file)->write; struct lruvec *lruvec = v; @@ -317,7 +327,7 @@ Signed-off-by: Andrew Morton int nid = lruvec_pgdat(lruvec)->node_id; struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); -@@ -5826,7 +5826,7 @@ void lru_gen_init_lruvec(struct lruvec * +@@ -5823,7 +5823,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) { int i; int gen, type, zone; @@ -326,9 +336,11 @@ Signed-off-by: Andrew Morton lrugen->max_seq = MIN_NR_GENS + 1; lrugen->enabled = lru_gen_enabled(); +diff --git a/mm/workingset.c b/mm/workingset.c +index ae7e984b23c6b..688aaa73f64e8 100644 --- a/mm/workingset.c +++ b/mm/workingset.c -@@ -223,7 +223,7 @@ static void *lru_gen_eviction(struct fol +@@ -223,7 +223,7 @@ static void *lru_gen_eviction(struct folio *folio) unsigned long token; unsigned long min_seq; struct lruvec *lruvec; @@ -337,7 +349,7 @@ Signed-off-by: Andrew Morton int type = folio_is_file_lru(folio); int delta = folio_nr_pages(folio); int refs = folio_lru_refs(folio); -@@ -252,7 +252,7 @@ static void lru_gen_refault(struct folio +@@ -252,7 +252,7 @@ static void lru_gen_refault(struct folio *folio, void *shadow) unsigned long token; unsigned long min_seq; struct lruvec *lruvec; @@ -346,3 +358,6 @@ Signed-off-by: Andrew Morton struct mem_cgroup *memcg; struct pglist_data *pgdat; int type = folio_is_file_lru(folio); +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-22-mm-multi-gen-LRU-rename-lrugen-lists-to-lrugen-pages.patch b/target/linux/generic/backport-6.1/020-v6.3-02-UPSTREAM-mm-multi-gen-LRU-rename-lrugen-lists-to-lru.patch similarity index 60% rename from target/linux/generic/backport-6.1/020-v6.3-22-mm-multi-gen-LRU-rename-lrugen-lists-to-lrugen-pages.patch rename to target/linux/generic/backport-6.1/020-v6.3-02-UPSTREAM-mm-multi-gen-LRU-rename-lrugen-lists-to-lru.patch index 7940380c5c..65fb0144b1 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-22-mm-multi-gen-LRU-rename-lrugen-lists-to-lrugen-pages.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-02-UPSTREAM-mm-multi-gen-LRU-rename-lrugen-lists-to-lru.patch @@ -1,7 +1,7 @@ -From afd37e73db04c7e6b47411120ac5f6a7eca51fec Mon Sep 17 00:00:00 2001 +From 656287d55d9cfc72a4bcd4d9bd098570f12ce409 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:00 -0700 -Subject: [PATCH 22/29] mm: multi-gen LRU: rename lrugen->lists[] to +Subject: [PATCH 02/19] UPSTREAM: mm: multi-gen LRU: rename lrugen->lists[] to lrugen->folios[] lru_gen_folio will be chained into per-node lists by the coming @@ -17,15 +17,54 @@ Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton +Bug: 274865848 +(cherry picked from commit 6df1b2212950aae2b2188c6645ea18e2a9e3fdd5) +Change-Id: I09f53e0fb2cd6b8b3adbb8a80b15dc5efbeae857 +Signed-off-by: T.J. Mercier --- - include/linux/mm_inline.h | 4 ++-- - include/linux/mmzone.h | 8 ++++---- - mm/vmscan.c | 20 ++++++++++---------- - 3 files changed, 16 insertions(+), 16 deletions(-) + Documentation/mm/multigen_lru.rst | 8 ++++---- + include/linux/mm_inline.h | 4 ++-- + include/linux/mmzone.h | 8 ++++---- + mm/vmscan.c | 20 ++++++++++---------- + 4 files changed, 20 insertions(+), 20 deletions(-) +diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst +index d7062c6a89464..d8f721f98868a 100644 +--- a/Documentation/mm/multigen_lru.rst ++++ b/Documentation/mm/multigen_lru.rst +@@ -89,15 +89,15 @@ variables are monotonically increasing. + + Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)`` + bits in order to fit into the gen counter in ``folio->flags``. Each +-truncated generation number is an index to ``lrugen->lists[]``. The ++truncated generation number is an index to ``lrugen->folios[]``. The + sliding window technique is used to track at least ``MIN_NR_GENS`` and + at most ``MAX_NR_GENS`` generations. The gen counter stores a value + within ``[1, MAX_NR_GENS]`` while a page is on one of +-``lrugen->lists[]``; otherwise it stores zero. ++``lrugen->folios[]``; otherwise it stores zero. + + Each generation is divided into multiple tiers. A page accessed ``N`` + times through file descriptors is in tier ``order_base_2(N)``. Unlike +-generations, tiers do not have dedicated ``lrugen->lists[]``. In ++generations, tiers do not have dedicated ``lrugen->folios[]``. In + contrast to moving across generations, which requires the LRU lock, + moving across tiers only involves atomic operations on + ``folio->flags`` and therefore has a negligible cost. A feedback loop +@@ -127,7 +127,7 @@ page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``. + Eviction + -------- + The eviction consumes old generations. Given an ``lruvec``, it +-increments ``min_seq`` when ``lrugen->lists[]`` indexed by ++increments ``min_seq`` when ``lrugen->folios[]`` indexed by + ``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to + evict from, it first compares ``min_seq[]`` to select the older type. + If both types are equally old, it selects the one whose first tier has +diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h +index f63968bd7de59..da38e3d962e2f 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h -@@ -256,9 +256,9 @@ static inline bool lru_gen_add_folio(str +@@ -256,9 +256,9 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ if (reclaiming) @@ -37,6 +76,8 @@ Signed-off-by: Andrew Morton return true; } +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index bd3e4689f72dc..02e4323744715 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -312,7 +312,7 @@ enum lruvec_flags { @@ -68,9 +109,11 @@ Signed-off-by: Andrew Morton /* the multi-gen LRU sizes, eventually consistent */ long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the exponential moving average of refaulted */ +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 27142caf284c1..b02fed912f742 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -4261,7 +4261,7 @@ static bool inc_min_seq(struct lruvec *l +@@ -4258,7 +4258,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) /* prevent cold/hot inversion if force_scan is true */ for (zone = 0; zone < MAX_NR_ZONES; zone++) { @@ -79,7 +122,7 @@ Signed-off-by: Andrew Morton while (!list_empty(head)) { struct folio *folio = lru_to_folio(head); -@@ -4272,7 +4272,7 @@ static bool inc_min_seq(struct lruvec *l +@@ -4269,7 +4269,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); new_gen = folio_inc_gen(lruvec, folio, false); @@ -88,7 +131,7 @@ Signed-off-by: Andrew Morton if (!--remaining) return false; -@@ -4300,7 +4300,7 @@ static bool try_to_inc_min_seq(struct lr +@@ -4297,7 +4297,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) gen = lru_gen_from_seq(min_seq[type]); for (zone = 0; zone < MAX_NR_ZONES; zone++) { @@ -97,7 +140,7 @@ Signed-off-by: Andrew Morton goto next; } -@@ -4765,7 +4765,7 @@ static bool sort_folio(struct lruvec *lr +@@ -4762,7 +4762,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) /* promoted */ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { @@ -106,7 +149,7 @@ Signed-off-by: Andrew Morton return true; } -@@ -4774,7 +4774,7 @@ static bool sort_folio(struct lruvec *lr +@@ -4771,7 +4771,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) int hist = lru_hist_from_seq(lrugen->min_seq[type]); gen = folio_inc_gen(lruvec, folio, false); @@ -115,7 +158,7 @@ Signed-off-by: Andrew Morton WRITE_ONCE(lrugen->protected[hist][type][tier - 1], lrugen->protected[hist][type][tier - 1] + delta); -@@ -4786,7 +4786,7 @@ static bool sort_folio(struct lruvec *lr +@@ -4783,7 +4783,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx) if (folio_test_locked(folio) || folio_test_writeback(folio) || (type == LRU_GEN_FILE && folio_test_dirty(folio))) { gen = folio_inc_gen(lruvec, folio, true); @@ -124,7 +167,7 @@ Signed-off-by: Andrew Morton return true; } -@@ -4853,7 +4853,7 @@ static int scan_folios(struct lruvec *lr +@@ -4850,7 +4850,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, for (zone = sc->reclaim_idx; zone >= 0; zone--) { LIST_HEAD(moved); int skipped = 0; @@ -133,7 +176,7 @@ Signed-off-by: Andrew Morton while (!list_empty(head)) { struct folio *folio = lru_to_folio(head); -@@ -5253,7 +5253,7 @@ static bool __maybe_unused state_is_vali +@@ -5250,7 +5250,7 @@ static bool __maybe_unused state_is_valid(struct lruvec *lruvec) int gen, type, zone; for_each_gen_type_zone(gen, type, zone) { @@ -142,7 +185,7 @@ Signed-off-by: Andrew Morton return false; } } -@@ -5298,7 +5298,7 @@ static bool drain_evictable(struct lruve +@@ -5295,7 +5295,7 @@ static bool drain_evictable(struct lruvec *lruvec) int remaining = MAX_LRU_BATCH; for_each_gen_type_zone(gen, type, zone) { @@ -151,7 +194,7 @@ Signed-off-by: Andrew Morton while (!list_empty(head)) { bool success; -@@ -5835,7 +5835,7 @@ void lru_gen_init_lruvec(struct lruvec * +@@ -5832,7 +5832,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) lrugen->timestamps[i] = jiffies; for_each_gen_type_zone(gen, type, zone) @@ -160,3 +203,6 @@ Signed-off-by: Andrew Morton lruvec->mm_state.seq = MIN_NR_GENS; init_waitqueue_head(&lruvec->mm_state.wait); +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-23-mm-multi-gen-LRU-remove-eviction-fairness-safeguard.patch b/target/linux/generic/backport-6.1/020-v6.3-03-UPSTREAM-mm-multi-gen-LRU-remove-eviction-fairness-s.patch similarity index 79% rename from target/linux/generic/backport-6.1/020-v6.3-23-mm-multi-gen-LRU-remove-eviction-fairness-safeguard.patch rename to target/linux/generic/backport-6.1/020-v6.3-03-UPSTREAM-mm-multi-gen-LRU-remove-eviction-fairness-s.patch index d214898a6d..6d764bb2b4 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-23-mm-multi-gen-LRU-remove-eviction-fairness-safeguard.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-03-UPSTREAM-mm-multi-gen-LRU-remove-eviction-fairness-s.patch @@ -1,7 +1,8 @@ -From ce45f1c4b32cf69b166f56ef5bc6c761e06ed4e5 Mon Sep 17 00:00:00 2001 +From 14f9a7a15f3d1af351f30e0438fd747b7ac253b0 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:01 -0700 -Subject: [PATCH 23/29] mm: multi-gen LRU: remove eviction fairness safeguard +Subject: [PATCH 03/19] UPSTREAM: mm: multi-gen LRU: remove eviction fairness + safeguard Recall that the eviction consumes the oldest generation: first it bucket-sorts folios whose gen counters were updated by the aging and @@ -31,13 +32,19 @@ Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton +Bug: 274865848 +(cherry picked from commit a579086c99ed70cc4bfc104348dbe3dd8f2787e6) +Change-Id: I08ac1b3c90e29cafd0566785aaa4bcdb5db7d22c +Signed-off-by: T.J. Mercier --- - mm/vmscan.c | 82 +++++++++++++++-------------------------------------- - 1 file changed, 23 insertions(+), 59 deletions(-) + mm/vmscan.c | 81 +++++++++++++++-------------------------------------- + 1 file changed, 23 insertions(+), 58 deletions(-) +diff --git a/mm/vmscan.c b/mm/vmscan.c +index b02fed912f742..991961180b320 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -448,6 +448,11 @@ static bool cgroup_reclaim(struct scan_c +@@ -448,6 +448,11 @@ static bool cgroup_reclaim(struct scan_control *sc) return sc->target_mem_cgroup; } @@ -49,7 +56,7 @@ Signed-off-by: Andrew Morton /** * writeback_throttling_sane - is the usual dirty throttling mechanism available? * @sc: scan_control in question -@@ -498,6 +503,11 @@ static bool cgroup_reclaim(struct scan_c +@@ -498,6 +503,11 @@ static bool cgroup_reclaim(struct scan_control *sc) return false; } @@ -61,7 +68,7 @@ Signed-off-by: Andrew Morton static bool writeback_throttling_sane(struct scan_control *sc) { return true; -@@ -4996,8 +5006,7 @@ static int isolate_folios(struct lruvec +@@ -4993,8 +5003,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw return scanned; } @@ -71,7 +78,7 @@ Signed-off-by: Andrew Morton { int type; int scanned; -@@ -5086,9 +5095,6 @@ retry: +@@ -5083,9 +5092,6 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap goto retry; } @@ -81,7 +88,7 @@ Signed-off-by: Andrew Morton return scanned; } -@@ -5127,67 +5133,26 @@ done: +@@ -5124,67 +5130,26 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control * return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; } @@ -158,7 +165,7 @@ Signed-off-by: Andrew Morton lru_add_drain(); -@@ -5211,7 +5176,7 @@ static void lru_gen_shrink_lruvec(struct +@@ -5208,7 +5173,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc if (!nr_to_scan) goto done; @@ -167,7 +174,7 @@ Signed-off-by: Andrew Morton if (!delta) goto done; -@@ -5219,7 +5184,7 @@ static void lru_gen_shrink_lruvec(struct +@@ -5216,7 +5181,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc if (scanned >= nr_to_scan) break; @@ -176,7 +183,7 @@ Signed-off-by: Andrew Morton break; cond_resched(); -@@ -5669,7 +5634,7 @@ static int run_eviction(struct lruvec *l +@@ -5666,7 +5631,7 @@ static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_co if (sc->nr_reclaimed >= nr_to_reclaim) return 0; @@ -185,3 +192,6 @@ Signed-off-by: Andrew Morton return 0; cond_resched(); +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch b/target/linux/generic/backport-6.1/020-v6.3-04-BACKPORT-mm-multi-gen-LRU-remove-aging-fairness-safe.patch similarity index 85% rename from target/linux/generic/backport-6.1/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch rename to target/linux/generic/backport-6.1/020-v6.3-04-BACKPORT-mm-multi-gen-LRU-remove-aging-fairness-safe.patch index 7cbabd34e4..c3f534a9e0 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-04-BACKPORT-mm-multi-gen-LRU-remove-aging-fairness-safe.patch @@ -1,7 +1,8 @@ -From e20b7386fccc18c791796eb1dc1a91eee3ccf801 Mon Sep 17 00:00:00 2001 +From f3c93d2e37a3c56593d7ccf4f4bcf1b58426fdd8 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:02 -0700 -Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard +Subject: [PATCH 04/19] BACKPORT: mm: multi-gen LRU: remove aging fairness + safeguard Recall that the aging produces the youngest generation: first it scans for accessed folios and updates their gen counters; then it increments @@ -31,10 +32,18 @@ Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton +Bug: 274865848 +(cherry picked from commit 7348cc91821b0cb24dfb00e578047f68299a50ab) +[TJ: Resolved conflicts with older function signatures for +min_cgroup_below_min / min_cgroup_below_low] +Change-Id: I6e36ecfbaaefbc0a56d9a9d5d7cbe404ed7f57a5 +Signed-off-by: T.J. Mercier --- mm/vmscan.c | 126 ++++++++++++++++++++++++---------------------------- 1 file changed, 59 insertions(+), 67 deletions(-) +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 991961180b320..5a2e83e673232 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -136,7 +136,6 @@ struct scan_control { @@ -45,7 +54,7 @@ Signed-off-by: Andrew Morton unsigned long last_reclaimed; #endif -@@ -4458,7 +4457,7 @@ done: +@@ -4455,7 +4454,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, return true; } @@ -54,7 +63,7 @@ Signed-off-by: Andrew Morton struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) { int gen, type, zone; -@@ -4467,6 +4466,13 @@ static bool should_run_aging(struct lruv +@@ -4464,6 +4463,13 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig unsigned long total = 0; struct lru_gen_folio *lrugen = &lruvec->lrugen; struct mem_cgroup *memcg = lruvec_memcg(lruvec); @@ -68,7 +77,7 @@ Signed-off-by: Andrew Morton for (type = !can_swap; type < ANON_AND_FILE; type++) { unsigned long seq; -@@ -4495,8 +4501,6 @@ static bool should_run_aging(struct lruv +@@ -4492,8 +4498,6 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig * stalls when the number of generations reaches MIN_NR_GENS. Hence, the * ideal number of generations is MIN_NR_GENS+1. */ @@ -77,7 +86,7 @@ Signed-off-by: Andrew Morton if (min_seq[!can_swap] + MIN_NR_GENS < max_seq) return false; -@@ -4515,40 +4519,54 @@ static bool should_run_aging(struct lruv +@@ -4512,40 +4516,54 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig return false; } @@ -153,7 +162,7 @@ Signed-off-by: Andrew Morton } /* to protect the working set of the last N jiffies */ -@@ -4557,46 +4575,32 @@ static unsigned long lru_gen_min_ttl __r +@@ -4554,46 +4572,32 @@ static unsigned long lru_gen_min_ttl __read_mostly; static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { struct mem_cgroup *memcg; @@ -207,7 +216,7 @@ Signed-off-by: Andrew Morton */ if (mutex_trylock(&oom_lock)) { struct oom_control oc = { -@@ -5104,33 +5108,27 @@ retry: +@@ -5101,33 +5105,27 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap * reclaim. */ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, @@ -247,7 +256,7 @@ Signed-off-by: Andrew Morton } static unsigned long get_nr_to_reclaim(struct scan_control *sc) -@@ -5149,9 +5147,7 @@ static unsigned long get_nr_to_reclaim(s +@@ -5146,9 +5144,7 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc) static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { struct blk_plug plug; @@ -257,7 +266,7 @@ Signed-off-by: Andrew Morton unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); lru_add_drain(); -@@ -5172,13 +5168,13 @@ static void lru_gen_shrink_lruvec(struct +@@ -5169,13 +5165,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc else swappiness = 0; @@ -274,7 +283,7 @@ Signed-off-by: Andrew Morton scanned += delta; if (scanned >= nr_to_scan) -@@ -5190,10 +5186,6 @@ static void lru_gen_shrink_lruvec(struct +@@ -5187,10 +5183,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc cond_resched(); } @@ -285,3 +294,6 @@ Signed-off-by: Andrew Morton clear_mm_walk(); blk_finish_plug(&plug); +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-25-mm-multi-gen-LRU-shuffle-should_run_aging.patch b/target/linux/generic/backport-6.1/020-v6.3-05-UPSTREAM-mm-multi-gen-LRU-shuffle-should_run_aging.patch similarity index 87% rename from target/linux/generic/backport-6.1/020-v6.3-25-mm-multi-gen-LRU-shuffle-should_run_aging.patch rename to target/linux/generic/backport-6.1/020-v6.3-05-UPSTREAM-mm-multi-gen-LRU-shuffle-should_run_aging.patch index bedbbd3e1b..d6bfe2a91c 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-25-mm-multi-gen-LRU-shuffle-should_run_aging.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-05-UPSTREAM-mm-multi-gen-LRU-shuffle-should_run_aging.patch @@ -1,12 +1,11 @@ -From 107d54931df3c28d81648122e219bf0034ef4e99 Mon Sep 17 00:00:00 2001 +From eca3858631e0cbad2ca6e40f788892749428e4cb Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:03 -0700 -Subject: [PATCH 25/29] mm: multi-gen LRU: shuffle should_run_aging() +Subject: [PATCH 05/19] UPSTREAM: mm: multi-gen LRU: shuffle should_run_aging() Move should_run_aging() next to its only caller left. Link: https://lkml.kernel.org/r/20221222041905.2431096-6-yuzhao@google.com -Signed-off-by: Yu Zhao Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michael Larabel @@ -15,13 +14,21 @@ Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton +Bug: 274865848 +(cherry picked from commit 77d4459a4a1a472b7309e475f962dda87d950abd) +Signed-off-by: T.J. Mercier +Change-Id: I3b0383fe16b93a783b4d8c0b3a0b325160392576 +Signed-off-by: Yu Zhao +Signed-off-by: T.J. Mercier --- mm/vmscan.c | 124 ++++++++++++++++++++++++++-------------------------- 1 file changed, 62 insertions(+), 62 deletions(-) +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 5a2e83e673232..0c47952714b26 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -4457,68 +4457,6 @@ done: +@@ -4454,68 +4454,6 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, return true; } @@ -90,7 +97,7 @@ Signed-off-by: Andrew Morton static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) { int gen, type, zone; -@@ -5102,6 +5040,68 @@ retry: +@@ -5099,6 +5037,68 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap return scanned; } @@ -159,3 +166,6 @@ Signed-off-by: Andrew Morton /* * For future optimizations: * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-26-mm-multi-gen-LRU-per-node-lru_gen_page-lists.patch b/target/linux/generic/backport-6.1/020-v6.3-06-BACKPORT-mm-multi-gen-LRU-per-node-lru_gen_folio-lis.patch similarity index 85% rename from target/linux/generic/backport-6.1/020-v6.3-26-mm-multi-gen-LRU-per-node-lru_gen_page-lists.patch rename to target/linux/generic/backport-6.1/020-v6.3-06-BACKPORT-mm-multi-gen-LRU-per-node-lru_gen_folio-lis.patch index 45789ff209..14f28820ec 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-26-mm-multi-gen-LRU-per-node-lru_gen_page-lists.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-06-BACKPORT-mm-multi-gen-LRU-per-node-lru_gen_folio-lis.patch @@ -1,7 +1,8 @@ -From fa6363828d314e837c5f79e97ea5e8c0d2f7f062 Mon Sep 17 00:00:00 2001 +From 8ee8571e47aa75221e5fbd4c9c7802fc4244c346 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:04 -0700 -Subject: [PATCH 26/29] mm: multi-gen LRU: per-node lru_gen_folio lists +Subject: [PATCH 06/19] BACKPORT: mm: multi-gen LRU: per-node lru_gen_folio + lists For each node, memcgs are divided into two generations: the old and the young. For each generation, memcgs are randomly sharded into @@ -58,18 +59,26 @@ Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton +Bug: 274865848 +(cherry picked from commit e4dde56cd208674ce899b47589f263499e5b8cdc) +[TJ: Resolved conflicts with older function signatures for +min_cgroup_below_min / min_cgroup_below_low and includes] +Change-Id: Idc8a0f635e035d72dd911f807d1224cb47cbd655 +Signed-off-by: T.J. Mercier --- include/linux/memcontrol.h | 10 + include/linux/mm_inline.h | 17 ++ include/linux/mmzone.h | 117 +++++++++++- mm/memcontrol.c | 16 ++ - mm/folio_alloc.c | 1 + - mm/vmscan.c | 373 +++++++++++++++++++++++++++++++++---- - 6 files changed, 499 insertions(+), 35 deletions(-) + mm/page_alloc.c | 1 + + mm/vmscan.c | 374 +++++++++++++++++++++++++++++++++---- + 6 files changed, 500 insertions(+), 35 deletions(-) +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h +index e039763029563..82d28b052a9e5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h -@@ -790,6 +790,11 @@ static inline void obj_cgroup_put(struct +@@ -790,6 +790,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg) percpu_ref_put(&objcg->refcnt); } @@ -81,7 +90,7 @@ Signed-off-by: Andrew Morton static inline void mem_cgroup_put(struct mem_cgroup *memcg) { if (memcg) -@@ -1290,6 +1295,11 @@ static inline void obj_cgroup_put(struct +@@ -1290,6 +1295,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg) { } @@ -93,9 +102,11 @@ Signed-off-by: Andrew Morton static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } +diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h +index da38e3d962e2f..c1fd3922dc5dd 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h -@@ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void +@@ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void) return current->in_lru_fault; } @@ -114,7 +125,7 @@ Signed-off-by: Andrew Morton static inline int lru_gen_from_seq(unsigned long seq) { return seq % MAX_NR_GENS; -@@ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void +@@ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void) return false; } @@ -126,6 +137,8 @@ Signed-off-by: Andrew Morton static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { return false; +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 02e4323744715..66e067a635682 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -7,6 +7,7 @@ @@ -167,7 +180,7 @@ Signed-off-by: Andrew Morton }; enum { -@@ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec * +@@ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec *lruvec); void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); #ifdef CONFIG_MEMCG @@ -256,7 +269,7 @@ Signed-off-by: Andrew Morton static inline void lru_gen_init_lruvec(struct lruvec *lruvec) { } -@@ -494,6 +587,7 @@ static inline void lru_gen_look_around(s +@@ -494,6 +587,7 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) } #ifdef CONFIG_MEMCG @@ -264,7 +277,7 @@ Signed-off-by: Andrew Morton static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } -@@ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(st +@@ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) { } @@ -299,9 +312,11 @@ Signed-off-by: Andrew Morton #endif CACHELINE_PADDING(_pad2_); +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index 3e8f1ad0fe9db..7815d556e38cc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -477,6 +477,16 @@ static void mem_cgroup_update_tree(struc +@@ -477,6 +477,16 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) struct mem_cgroup_per_node *mz; struct mem_cgroup_tree_per_node *mctz; @@ -318,7 +333,7 @@ Signed-off-by: Andrew Morton mctz = soft_limit_tree.rb_tree_per_node[nid]; if (!mctz) return; -@@ -3522,6 +3532,9 @@ unsigned long mem_cgroup_soft_limit_recl +@@ -3522,6 +3532,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, struct mem_cgroup_tree_per_node *mctz; unsigned long excess; @@ -328,7 +343,7 @@ Signed-off-by: Andrew Morton if (order > 0) return 0; -@@ -5382,6 +5395,7 @@ static int mem_cgroup_css_online(struct +@@ -5382,6 +5395,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) if (unlikely(mem_cgroup_is_root(memcg))) queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ); @@ -336,7 +351,7 @@ Signed-off-by: Andrew Morton return 0; offline_kmem: memcg_offline_kmem(memcg); -@@ -5413,6 +5427,7 @@ static void mem_cgroup_css_offline(struc +@@ -5413,6 +5427,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) memcg_offline_kmem(memcg); reparent_shrinker_deferred(memcg); wb_memcg_offline(memcg); @@ -344,7 +359,7 @@ Signed-off-by: Andrew Morton drain_all_stock(memcg); -@@ -5424,6 +5439,7 @@ static void mem_cgroup_css_released(stru +@@ -5424,6 +5439,7 @@ static void mem_cgroup_css_released(struct cgroup_subsys_state *css) struct mem_cgroup *memcg = mem_cgroup_from_css(css); invalidate_reclaim_iterators(memcg); @@ -352,9 +367,11 @@ Signed-off-by: Andrew Morton } static void mem_cgroup_css_free(struct cgroup_subsys_state *css) +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 69668817fed37..473057b81a9df 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -7957,6 +7957,7 @@ static void __init free_area_init_node(i +@@ -7957,6 +7957,7 @@ static void __init free_area_init_node(int nid) pgdat_set_deferred_range(pgdat); free_area_init_core(pgdat); @@ -362,6 +379,8 @@ Signed-off-by: Andrew Morton } static void __init free_area_init_memoryless_node(int nid) +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 0c47952714b26..65eb28448f216 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -54,6 +54,8 @@ @@ -385,7 +404,7 @@ Signed-off-by: Andrew Morton /* Allocation order */ s8 order; -@@ -3160,6 +3157,9 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_ca +@@ -3160,6 +3157,9 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) @@ -395,7 +414,7 @@ Signed-off-by: Andrew Morton static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) { struct pglist_data *pgdat = NODE_DATA(nid); -@@ -4443,8 +4443,7 @@ done: +@@ -4440,8 +4440,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, if (sc->priority <= DEF_PRIORITY - 2) wait_event_killable(lruvec->mm_state.wait, max_seq < READ_ONCE(lrugen->max_seq)); @@ -405,7 +424,7 @@ Signed-off-by: Andrew Morton } VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); -@@ -4517,8 +4516,6 @@ static void lru_gen_age_node(struct pgli +@@ -4514,8 +4513,6 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) VM_WARN_ON_ONCE(!current_is_kswapd()); @@ -414,7 +433,7 @@ Signed-off-by: Andrew Morton /* check the order to exclude compaction-induced reclaim */ if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY) return; -@@ -5107,8 +5104,7 @@ static bool should_run_aging(struct lruv +@@ -5104,8 +5101,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg * reclaim. */ @@ -424,7 +443,7 @@ Signed-off-by: Andrew Morton { unsigned long nr_to_scan; struct mem_cgroup *memcg = lruvec_memcg(lruvec); -@@ -5125,10 +5121,8 @@ static unsigned long get_nr_to_scan(stru +@@ -5122,10 +5118,8 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control * if (sc->priority == DEF_PRIORITY) return nr_to_scan; @@ -436,7 +455,7 @@ Signed-off-by: Andrew Morton } static unsigned long get_nr_to_reclaim(struct scan_control *sc) -@@ -5137,29 +5131,18 @@ static unsigned long get_nr_to_reclaim(s +@@ -5134,29 +5128,18 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc) if (!global_reclaim(sc)) return -1; @@ -468,7 +487,7 @@ Signed-off-by: Andrew Morton if (sc->may_swap) swappiness = get_swappiness(lruvec, sc); -@@ -5169,7 +5152,7 @@ static void lru_gen_shrink_lruvec(struct +@@ -5166,7 +5149,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc swappiness = 0; nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); @@ -477,7 +496,7 @@ Signed-off-by: Andrew Morton break; delta = evict_folios(lruvec, sc, swappiness); -@@ -5186,10 +5169,250 @@ static void lru_gen_shrink_lruvec(struct +@@ -5183,11 +5166,252 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc cond_resched(); } @@ -515,8 +534,9 @@ Signed-off-by: Andrew Morton + + shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority); + -+ vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned, -+ sc->nr_reclaimed - reclaimed); ++ if (!sc->proactive) ++ vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned, ++ sc->nr_reclaimed - reclaimed); + + sc->nr_reclaimed += current->reclaim_state->reclaimed_slab; + current->reclaim_state->reclaimed_slab = 0; @@ -538,7 +558,7 @@ Signed-off-by: Andrew Morton + struct mem_cgroup *memcg = NULL; + unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); + -+ bin = first_bin = prandom_u32_max(MEMCG_NR_BINS); ++ bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); +restart: + gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); + @@ -601,11 +621,11 @@ Signed-off-by: Andrew Morton + if (try_to_shrink_lruvec(lruvec, sc)) + lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG); + -+ clear_mm_walk(); -+ -+ blk_finish_plug(&plug); -+} -+ + clear_mm_walk(); + + blk_finish_plug(&plug); + } + +#else /* !CONFIG_MEMCG */ + +static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) @@ -671,9 +691,9 @@ Signed-off-by: Andrew Morton + if (current_is_kswapd()) + sc->nr_reclaimed += reclaimed; + - clear_mm_walk(); - - blk_finish_plug(&plug); ++ clear_mm_walk(); ++ ++ blk_finish_plug(&plug); + + /* kswapd should never fail */ + pgdat->kswapd_failures = 0; @@ -684,7 +704,7 @@ Signed-off-by: Andrew Morton +{ + int seg; + int old, new; -+ int bin = prandom_u32_max(MEMCG_NR_BINS); ++ int bin = get_random_u32_below(MEMCG_NR_BINS); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + spin_lock(&pgdat->memcg_lru.lock); @@ -723,12 +743,13 @@ Signed-off-by: Andrew Morton + WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); + + spin_unlock(&pgdat->memcg_lru.lock); - } ++} +#endif - ++ /****************************************************************************** * state change -@@ -5647,11 +5870,11 @@ static int run_cmd(char cmd, int memcg_i + ******************************************************************************/ +@@ -5644,11 +5868,11 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, if (!mem_cgroup_disabled()) { rcu_read_lock(); @@ -743,7 +764,7 @@ Signed-off-by: Andrew Morton rcu_read_unlock(); if (!memcg) -@@ -5799,6 +6022,19 @@ void lru_gen_init_lruvec(struct lruvec * +@@ -5796,6 +6020,19 @@ void lru_gen_init_lruvec(struct lruvec *lruvec) } #ifdef CONFIG_MEMCG @@ -763,7 +784,7 @@ Signed-off-by: Andrew Morton void lru_gen_init_memcg(struct mem_cgroup *memcg) { INIT_LIST_HEAD(&memcg->mm_list.fifo); -@@ -5822,7 +6058,69 @@ void lru_gen_exit_memcg(struct mem_cgrou +@@ -5819,7 +6056,69 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) } } } @@ -773,7 +794,7 @@ Signed-off-by: Andrew Morton +{ + int gen; + int nid; -+ int bin = prandom_u32_max(MEMCG_NR_BINS); ++ int bin = get_random_u32_below(MEMCG_NR_BINS); + + for_each_node(nid) { + struct pglist_data *pgdat = NODE_DATA(nid); @@ -834,7 +855,7 @@ Signed-off-by: Andrew Morton static int __init init_lru_gen(void) { -@@ -5849,6 +6147,10 @@ static void lru_gen_shrink_lruvec(struct +@@ -5846,6 +6145,10 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc { } @@ -845,7 +866,7 @@ Signed-off-by: Andrew Morton #endif /* CONFIG_LRU_GEN */ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) -@@ -5862,7 +6164,7 @@ static void shrink_lruvec(struct lruvec +@@ -5859,7 +6162,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) bool proportional_reclaim; struct blk_plug plug; @@ -854,7 +875,7 @@ Signed-off-by: Andrew Morton lru_gen_shrink_lruvec(lruvec, sc); return; } -@@ -6105,6 +6407,11 @@ static void shrink_node(pg_data_t *pgdat +@@ -6102,6 +6405,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) struct lruvec *target_lruvec; bool reclaimable = false; @@ -866,3 +887,6 @@ Signed-off-by: Andrew Morton target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); again: +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-27-mm-multi-gen-LRU-clarify-scan_control-flags.patch b/target/linux/generic/backport-6.1/020-v6.3-07-BACKPORT-mm-multi-gen-LRU-clarify-scan_control-flags.patch similarity index 71% rename from target/linux/generic/backport-6.1/020-v6.3-27-mm-multi-gen-LRU-clarify-scan_control-flags.patch rename to target/linux/generic/backport-6.1/020-v6.3-07-BACKPORT-mm-multi-gen-LRU-clarify-scan_control-flags.patch index 89f7ed30af..9aaf247a09 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-27-mm-multi-gen-LRU-clarify-scan_control-flags.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-07-BACKPORT-mm-multi-gen-LRU-clarify-scan_control-flags.patch @@ -1,7 +1,7 @@ -From 93147736b5b3a21bea24313bfc7a696829932009 Mon Sep 17 00:00:00 2001 +From 11b14ee8cbbbebd8204609076a9327a1171cd253 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:05 -0700 -Subject: [PATCH 27/29] mm: multi-gen LRU: clarify scan_control flags +Subject: [PATCH 07/19] BACKPORT: mm: multi-gen LRU: clarify scan_control flags Among the flags in scan_control: 1. sc->may_swap, which indicates swap constraint due to memsw.max, is @@ -12,7 +12,7 @@ Among the flags in scan_control: 3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers swappiness to prioritize file LRU, since clean file folios are more likely to exist. -4. sc->may_writefolio and sc->may_unmap, which indicates opportunistic +4. sc->may_writepage and sc->may_unmap, which indicates opportunistic reclaim, are rejected, since unmapped clean folios are already prioritized. Scanning for more of them is likely futile and can cause high reclaim latency when there is a large number of memcgs. @@ -29,13 +29,21 @@ Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton +Bug: 274865848 +(cherry picked from commit e9d4e1ee788097484606c32122f146d802a9c5fb) +[TJ: Resolved conflict with older function signature for min_cgroup_below_min, and over +cdded861182142ac4488a4d64c571107aeb77f53 ("ANDROID: MGLRU: Don't skip anon reclaim if swap low")] +Change-Id: Ic2e779eaf4e91a3921831b4e2fa10c740dc59d50 +Signed-off-by: T.J. Mercier --- mm/vmscan.c | 55 +++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 65eb28448f216..0a0e1250ffc87 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -3185,6 +3185,9 @@ static int get_swappiness(struct lruvec +@@ -3185,6 +3185,9 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -45,7 +53,7 @@ Signed-off-by: Andrew Morton if (!can_demote(pgdat->node_id, sc) && mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH) return 0; -@@ -4226,7 +4229,7 @@ static void walk_mm(struct lruvec *lruve +@@ -4223,7 +4226,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_ } while (err == -EAGAIN); } @@ -54,7 +62,7 @@ Signed-off-by: Andrew Morton { struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk; -@@ -4234,7 +4237,7 @@ static struct lru_gen_mm_walk *set_mm_wa +@@ -4231,7 +4234,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat) VM_WARN_ON_ONCE(walk); walk = &pgdat->mm_walk; @@ -63,7 +71,7 @@ Signed-off-by: Andrew Morton VM_WARN_ON_ONCE(current_is_kswapd()); walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); -@@ -4420,7 +4423,7 @@ static bool try_to_inc_max_seq(struct lr +@@ -4417,7 +4420,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, goto done; } @@ -72,7 +80,7 @@ Signed-off-by: Andrew Morton if (!walk) { success = iterate_mm_list_nowalk(lruvec, max_seq); goto done; -@@ -4489,8 +4492,6 @@ static bool lruvec_is_reclaimable(struct +@@ -4486,8 +4489,6 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MIN_SEQ(lruvec); @@ -81,7 +89,7 @@ Signed-off-by: Andrew Morton /* see the comment on lru_gen_folio */ gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); -@@ -4746,12 +4747,8 @@ static bool isolate_folio(struct lruvec +@@ -4743,12 +4744,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca { bool success; @@ -95,19 +103,19 @@ Signed-off-by: Andrew Morton (folio_test_dirty(folio) || (folio_test_anon(folio) && !folio_test_swapcache(folio)))) return false; -@@ -4848,9 +4845,8 @@ static int scan_folios(struct lruvec *lr +@@ -4845,9 +4842,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, __count_vm_events(PGSCAN_ANON + type, isolated); /* - * There might not be eligible pages due to reclaim_idx, may_unmap and - * may_writepage. Check the remaining to prevent livelock if it's not - * making progress. -+ * There might not be eligible pages due to reclaim_idx. Check the ++ * There might not be eligible folios due to reclaim_idx. Check the + * remaining to prevent livelock if it's not making progress. */ return isolated || !remaining ? scanned : 0; } -@@ -5110,8 +5106,7 @@ static long get_nr_to_scan(struct lruvec +@@ -5107,8 +5103,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); @@ -117,7 +125,7 @@ Signed-off-by: Andrew Morton return 0; if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) -@@ -5139,17 +5134,14 @@ static bool try_to_shrink_lruvec(struct +@@ -5136,17 +5131,14 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) long nr_to_scan; unsigned long scanned = 0; unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); @@ -140,7 +148,7 @@ Signed-off-by: Andrew Morton nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); if (nr_to_scan <= 0) -@@ -5279,12 +5271,13 @@ static void lru_gen_shrink_lruvec(struct +@@ -5277,12 +5269,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc struct blk_plug plug; VM_WARN_ON_ONCE(global_reclaim(sc)); @@ -155,7 +163,7 @@ Signed-off-by: Andrew Morton if (try_to_shrink_lruvec(lruvec, sc)) lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG); -@@ -5340,11 +5333,19 @@ static void lru_gen_shrink_node(struct p +@@ -5338,11 +5331,19 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control * VM_WARN_ON_ONCE(!global_reclaim(sc)); @@ -172,11 +180,11 @@ Signed-off-by: Andrew Morton blk_start_plug(&plug); - set_mm_walk(pgdat); -+ set_mm_walk(NULL, sc->proactive); ++ set_mm_walk(pgdat, sc->proactive); set_initial_priority(pgdat, sc); -@@ -5362,7 +5363,7 @@ static void lru_gen_shrink_node(struct p +@@ -5360,7 +5361,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control * clear_mm_walk(); blk_finish_plug(&plug); @@ -185,7 +193,7 @@ Signed-off-by: Andrew Morton /* kswapd should never fail */ pgdat->kswapd_failures = 0; } -@@ -5934,7 +5935,7 @@ static ssize_t lru_gen_seq_write(struct +@@ -5932,7 +5933,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, set_task_reclaim_state(current, &sc.reclaim_state); flags = memalloc_noreclaim_save(); blk_start_plug(&plug); @@ -194,3 +202,6 @@ Signed-off-by: Andrew Morton err = -ENOMEM; goto done; } +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-28-mm-multi-gen-LRU-simplify-arch_has_hw_pte_young-chec.patch b/target/linux/generic/backport-6.1/020-v6.3-08-UPSTREAM-mm-multi-gen-LRU-simplify-arch_has_hw_pte_y.patch similarity index 67% rename from target/linux/generic/backport-6.1/020-v6.3-28-mm-multi-gen-LRU-simplify-arch_has_hw_pte_young-chec.patch rename to target/linux/generic/backport-6.1/020-v6.3-08-UPSTREAM-mm-multi-gen-LRU-simplify-arch_has_hw_pte_y.patch index 367f93363b..adf85b616f 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-28-mm-multi-gen-LRU-simplify-arch_has_hw_pte_young-chec.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-08-UPSTREAM-mm-multi-gen-LRU-simplify-arch_has_hw_pte_y.patch @@ -1,8 +1,8 @@ -From cf3297e4c7a928da8b2b2f0baff2f9c69ea57952 Mon Sep 17 00:00:00 2001 +From 25887d48dff860751a06caa4188bfaf6bfb6e4b2 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:06 -0700 -Subject: [PATCH 28/29] mm: multi-gen LRU: simplify arch_has_hw_pte_young() - check +Subject: [PATCH 08/19] UPSTREAM: mm: multi-gen LRU: simplify + arch_has_hw_pte_young() check Scanning page tables when hardware does not set the accessed bit has no real use cases. @@ -17,13 +17,19 @@ Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton +Bug: 274865848 +(cherry picked from commit f386e9314025ea99dae639ed2032560a92081430) +Change-Id: I84d97ab665b4e3bb862a9bc7d72f50dea7191a6b +Signed-off-by: T.J. Mercier --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 0a0e1250ffc87..aa9746f2bc80b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -4418,7 +4418,7 @@ static bool try_to_inc_max_seq(struct lr +@@ -4415,7 +4415,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, * handful of PTEs. Spreading the work out over a period of time usually * is less efficient, but it avoids bursty page faults. */ @@ -32,3 +38,6 @@ Signed-off-by: Andrew Morton success = iterate_mm_list_nowalk(lruvec, max_seq); goto done; } +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-29-mm-multi-gen-LRU-avoid-futile-retries.patch b/target/linux/generic/backport-6.1/020-v6.3-09-UPSTREAM-mm-multi-gen-LRU-avoid-futile-retries.patch similarity index 74% rename from target/linux/generic/backport-6.1/020-v6.3-29-mm-multi-gen-LRU-avoid-futile-retries.patch rename to target/linux/generic/backport-6.1/020-v6.3-09-UPSTREAM-mm-multi-gen-LRU-avoid-futile-retries.patch index b9eb5f694e..2cf14d7816 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-29-mm-multi-gen-LRU-avoid-futile-retries.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-09-UPSTREAM-mm-multi-gen-LRU-avoid-futile-retries.patch @@ -1,7 +1,7 @@ -From cc67f962cc53f6e1dfa92eb85b7b26fe83a3c66f Mon Sep 17 00:00:00 2001 +From 620b0ee94455e48d124414cd06d8a53f69fb6453 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 13 Feb 2023 00:53:22 -0700 -Subject: [PATCH 29/29] mm: multi-gen LRU: avoid futile retries +Subject: [PATCH 09/19] UPSTREAM: mm: multi-gen LRU: avoid futile retries Recall that the per-node memcg LRU has two generations and they alternate when the last memcg (of a given node) is moved from one to the other. @@ -19,13 +19,19 @@ Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") Signed-off-by: Yu Zhao Reported-by: T.J. Mercier Signed-off-by: Andrew Morton +Bug: 274865848 +(cherry picked from commit 9f550d78b40da21b4da515db4c37d8d7b12aa1a6) +Change-Id: Ie92535676b005ec9e7987632b742fdde8d54436f +Signed-off-by: T.J. Mercier --- mm/vmscan.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) +diff --git a/mm/vmscan.c b/mm/vmscan.c +index aa9746f2bc80b..49da02f841c81 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -5208,18 +5208,20 @@ static int shrink_one(struct lruvec *lru +@@ -5206,18 +5206,20 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) { @@ -41,14 +47,14 @@ Signed-off-by: Andrew Morton - struct mem_cgroup *memcg = NULL; unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); - bin = first_bin = prandom_u32_max(MEMCG_NR_BINS); + bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); restart: + op = 0; + memcg = NULL; gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); rcu_read_lock(); -@@ -5243,14 +5245,22 @@ restart: +@@ -5241,14 +5243,22 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) op = shrink_one(lruvec, sc); @@ -74,7 +80,7 @@ Signed-off-by: Andrew Morton /* restart if raced with lru_gen_rotate_memcg() */ if (gen != get_nulls_value(pos)) goto restart; -@@ -5259,11 +5269,6 @@ restart: +@@ -5257,11 +5267,6 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) bin = get_memcg_bin(bin + 1); if (bin != first_bin) goto restart; @@ -86,3 +92,6 @@ Signed-off-by: Andrew Morton } static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-19-mm-add-vma_has_recency.patch b/target/linux/generic/backport-6.1/020-v6.3-10-UPSTREAM-mm-add-vma_has_recency.patch similarity index 75% rename from target/linux/generic/backport-6.1/020-v6.3-19-mm-add-vma_has_recency.patch rename to target/linux/generic/backport-6.1/020-v6.3-10-UPSTREAM-mm-add-vma_has_recency.patch index 5335b80488..0a0b439e25 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-19-mm-add-vma_has_recency.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-10-UPSTREAM-mm-add-vma_has_recency.patch @@ -1,7 +1,7 @@ -From 6c7f552a48b49a8612786a28a2239fbc24fac289 Mon Sep 17 00:00:00 2001 +From 70d216c71ff5c5b17dd1da6294f97b91fb6aba7a Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Dec 2022 14:52:51 -0700 -Subject: [PATCH 19/29] mm: add vma_has_recency() +Subject: [PATCH 10/19] UPSTREAM: mm: add vma_has_recency() Add vma_has_recency() to indicate whether a VMA may exhibit temporal locality that the LRU algorithm relies on. @@ -43,22 +43,28 @@ results are available in that thread. [1] https://lore.kernel.org/r/Y31s%2FK8T85jh05wH@google.com/ Link: https://lkml.kernel.org/r/20221230215252.2628425-1-yuzhao@google.com +Change-Id: I291dcb795197659e40e46539cd32b857677c34ad Signed-off-by: Yu Zhao Cc: Alexander Viro Cc: Andrea Righi Cc: Johannes Weiner Cc: Michael Larabel Signed-off-by: Andrew Morton +(cherry picked from commit 8788f6781486769d9598dcaedc3fe0eb12fc3e59) +Bug: 274865848 +Signed-off-by: T.J. Mercier --- - include/linux/mm_inline.h | 9 +++++++++ - mm/memory.c | 8 ++++---- + include/linux/mm_inline.h | 8 ++++++++ + mm/memory.c | 7 +++---- mm/rmap.c | 42 +++++++++++++++++---------------------- mm/vmscan.c | 5 ++++- - 4 files changed, 35 insertions(+), 29 deletions(-) + 4 files changed, 33 insertions(+), 29 deletions(-) +diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h +index c1fd3922dc5dd..7bb2e5f94734c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h -@@ -578,4 +578,12 @@ pte_install_uffd_wp_if_needed(struct vm_ +@@ -595,4 +595,12 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, #endif } @@ -71,9 +77,11 @@ Signed-off-by: Andrew Morton +} + #endif +diff --git a/mm/memory.c b/mm/memory.c +index 747b7ea30f890..c2f48f8003c2e 100644 --- a/mm/memory.c +++ b/mm/memory.c -@@ -1435,8 +1435,7 @@ again: +@@ -1435,8 +1435,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, force_flush = 1; set_page_dirty(page); } @@ -83,7 +91,7 @@ Signed-off-by: Andrew Morton mark_page_accessed(page); } rss[mm_counter(page)]--; -@@ -5170,8 +5169,8 @@ static inline void mm_account_fault(stru +@@ -5170,8 +5169,8 @@ static inline void mm_account_fault(struct pt_regs *regs, #ifdef CONFIG_LRU_GEN static void lru_gen_enter_fault(struct vm_area_struct *vma) { @@ -94,9 +102,11 @@ Signed-off-by: Andrew Morton } static void lru_gen_exit_fault(void) +diff --git a/mm/rmap.c b/mm/rmap.c +index 7da2d8d097d9b..825dac3caa1e5 100644 --- a/mm/rmap.c +++ b/mm/rmap.c -@@ -823,25 +823,14 @@ static bool folio_referenced_one(struct +@@ -823,25 +823,14 @@ static bool folio_referenced_one(struct folio *folio, } if (pvmw.pte) { @@ -125,16 +135,16 @@ Signed-off-by: Andrew Morton } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { if (pmdp_clear_flush_young_notify(vma, address, pvmw.pmd)) -@@ -875,7 +864,20 @@ static bool invalid_folio_referenced_vma +@@ -875,7 +864,20 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg) struct folio_referenced_arg *pra = arg; struct mem_cgroup *memcg = pra->memcg; - if (!mm_match_cgroup(vma->vm_mm, memcg)) + /* + * Ignore references from this mapping if it has no recency. If the -+ * page has been used in another mapping, we will catch it; if this ++ * folio has been used in another mapping, we will catch it; if this + * other mapping is already gone, the unmap path will have set the -+ * referenced flag or activated the page in zap_pte_range(). ++ * referenced flag or activated the folio in zap_pte_range(). + */ + if (!vma_has_recency(vma)) + return true; @@ -147,7 +157,7 @@ Signed-off-by: Andrew Morton return true; return false; -@@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio +@@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio, int is_locked, .arg = (void *)&pra, .anon_lock = folio_lock_anon_vma_read, .try_lock = true, @@ -155,7 +165,7 @@ Signed-off-by: Andrew Morton }; *vm_flags = 0; -@@ -921,15 +924,6 @@ int folio_referenced(struct folio *folio +@@ -921,15 +924,6 @@ int folio_referenced(struct folio *folio, int is_locked, return 1; } @@ -171,9 +181,11 @@ Signed-off-by: Andrew Morton rmap_walk(folio, &rwc); *vm_flags = pra.vm_flags; +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 49da02f841c81..596fed6ae0439 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c -@@ -3766,7 +3766,10 @@ static int should_skip_vma(unsigned long +@@ -3778,7 +3778,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal if (is_vm_hugetlb_page(vma)) return true; @@ -185,3 +197,6 @@ Signed-off-by: Andrew Morton return true; if (vma == get_gate_vma(vma->vm_mm)) +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-20-mm-support-POSIX_FADV_NOREUSE.patch b/target/linux/generic/backport-6.1/020-v6.3-11-UPSTREAM-mm-support-POSIX_FADV_NOREUSE.patch similarity index 78% rename from target/linux/generic/backport-6.1/020-v6.3-20-mm-support-POSIX_FADV_NOREUSE.patch rename to target/linux/generic/backport-6.1/020-v6.3-11-UPSTREAM-mm-support-POSIX_FADV_NOREUSE.patch index b2b7c605ca..4c6fcecdac 100644 --- a/target/linux/generic/backport-6.1/020-v6.3-20-mm-support-POSIX_FADV_NOREUSE.patch +++ b/target/linux/generic/backport-6.1/020-v6.3-11-UPSTREAM-mm-support-POSIX_FADV_NOREUSE.patch @@ -1,7 +1,7 @@ -From 686c3d4f71de9e0e7a27f03a5617a712385f90cd Mon Sep 17 00:00:00 2001 +From 9ca4e437a24dfc4ec6c362f319eb9850b9eca497 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Dec 2022 14:52:52 -0700 -Subject: [PATCH 20/29] mm: support POSIX_FADV_NOREUSE +Subject: [PATCH 11/19] UPSTREAM: mm: support POSIX_FADV_NOREUSE This patch adds POSIX_FADV_NOREUSE to vma_has_recency() so that the LRU algorithm can ignore access to mapped files marked by this flag. @@ -22,7 +22,7 @@ Its limitations are: by two users and one of them having set POSIX_FADV_NOREUSE on the file, this page will be activated upon the second user accessing it. This corner case can be covered by checking POSIX_FADV_NOREUSE - before calling mark_page_accessed() on the read path. But it is + before calling folio_mark_accessed() on the read path. But it is considered not worth the effort. There have been a few attempts to support POSIX_FADV_NOREUSE, e.g., [1]. @@ -67,32 +67,40 @@ which makes it on par with the active/inactive LRU. [2] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57 Link: https://lkml.kernel.org/r/20221230215252.2628425-2-yuzhao@google.com +Change-Id: I0b7f5f971d78014ea1ba44cee6a8ec902a4330d0 Signed-off-by: Yu Zhao Cc: Alexander Viro Cc: Andrea Righi Cc: Johannes Weiner Cc: Michael Larabel Signed-off-by: Andrew Morton +(cherry picked from commit 17e810229cb3068b692fa078bd9b3a6527e0866a) +Bug: 274865848 +Signed-off-by: T.J. Mercier --- include/linux/fs.h | 2 ++ include/linux/mm_inline.h | 3 +++ mm/fadvise.c | 5 ++++- 3 files changed, 9 insertions(+), 1 deletion(-) +diff --git a/include/linux/fs.h b/include/linux/fs.h +index f14ecbeab2a9d..97f9c41c1a43a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h -@@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb +@@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports DIRECT IO */ #define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000) -+#define FMODE_NOREUSE ((__force fmode_t)0x400000) ++#define FMODE_NOREUSE ((__force fmode_t)0x800000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) +diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h +index 7bb2e5f94734c..9a8e2049333c0 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h -@@ -583,6 +583,9 @@ static inline bool vma_has_recency(struc +@@ -600,6 +600,9 @@ static inline bool vma_has_recency(struct vm_area_struct *vma) if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)) return false; @@ -102,9 +110,11 @@ Signed-off-by: Andrew Morton return true; } +diff --git a/mm/fadvise.c b/mm/fadvise.c +index c76ee665355a4..2ba24d865bf5f 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c -@@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, l +@@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice) case POSIX_FADV_NORMAL: file->f_ra.ra_pages = bdi->ra_pages; spin_lock(&file->f_lock); @@ -113,7 +123,7 @@ Signed-off-by: Andrew Morton spin_unlock(&file->f_lock); break; case POSIX_FADV_RANDOM: -@@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, l +@@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice) force_page_cache_readahead(mapping, file, start_index, nrpages); break; case POSIX_FADV_NOREUSE: @@ -123,3 +133,6 @@ Signed-off-by: Andrew Morton break; case POSIX_FADV_DONTNEED: __filemap_fdatawrite_range(mapping, offset, endbyte, +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-12-UPSTREAM-mm-multi-gen-LRU-section-for-working-set-pr.patch b/target/linux/generic/backport-6.1/020-v6.3-12-UPSTREAM-mm-multi-gen-LRU-section-for-working-set-pr.patch new file mode 100644 index 0000000000..08804e0023 --- /dev/null +++ b/target/linux/generic/backport-6.1/020-v6.3-12-UPSTREAM-mm-multi-gen-LRU-section-for-working-set-pr.patch @@ -0,0 +1,74 @@ +From 1b5e4c317d80f4826eceb3781702d18d06b14394 Mon Sep 17 00:00:00 2001 +From: "T.J. Alumbaugh" +Date: Wed, 18 Jan 2023 00:18:21 +0000 +Subject: [PATCH 12/19] UPSTREAM: mm: multi-gen LRU: section for working set + protection + +Patch series "mm: multi-gen LRU: improve". + +This patch series improves a few MGLRU functions, collects related +functions, and adds additional documentation. + +This patch (of 7): + +Add a section for working set protection in the code and the design doc. +The admin doc already contains its usage. + +Link: https://lkml.kernel.org/r/20230118001827.1040870-1-talumbau@google.com +Link: https://lkml.kernel.org/r/20230118001827.1040870-2-talumbau@google.com +Change-Id: I65599075fd42951db7739a2ab7cee78516e157b3 +Signed-off-by: T.J. Alumbaugh +Cc: Yu Zhao +Signed-off-by: Andrew Morton +(cherry picked from commit 7b8144e63d84716f16a1b929e0c7e03ae5c4d5c1) +Bug: 274865848 +Signed-off-by: T.J. Mercier +--- + Documentation/mm/multigen_lru.rst | 15 +++++++++++++++ + mm/vmscan.c | 4 ++++ + 2 files changed, 19 insertions(+) + +diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst +index d8f721f98868a..6e1483e70fdca 100644 +--- a/Documentation/mm/multigen_lru.rst ++++ b/Documentation/mm/multigen_lru.rst +@@ -141,6 +141,21 @@ loop has detected outlying refaults from the tier this page is in. To + this end, the feedback loop uses the first tier as the baseline, for + the reason stated earlier. + ++Working set protection ++---------------------- ++Each generation is timestamped at birth. If ``lru_gen_min_ttl`` is ++set, an ``lruvec`` is protected from the eviction when its oldest ++generation was born within ``lru_gen_min_ttl`` milliseconds. In other ++words, it prevents the working set of ``lru_gen_min_ttl`` milliseconds ++from getting evicted. The OOM killer is triggered if this working set ++cannot be kept in memory. ++ ++This time-based approach has the following advantages: ++ ++1. It is easier to configure because it is agnostic to applications ++ and memory sizes. ++2. It is more reliable because it is directly wired to the OOM killer. ++ + Summary + ------- + The multi-gen LRU can be disassembled into the following parts: +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 596fed6ae0439..ab0b8d3b9d88f 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4459,6 +4459,10 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, + return true; + } + ++/****************************************************************************** ++ * working set protection ++ ******************************************************************************/ ++ + static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) + { + int gen, type, zone; +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-13-UPSTREAM-mm-multi-gen-LRU-section-for-rmap-PT-walk-f.patch b/target/linux/generic/backport-6.1/020-v6.3-13-UPSTREAM-mm-multi-gen-LRU-section-for-rmap-PT-walk-f.patch new file mode 100644 index 0000000000..f80dedb405 --- /dev/null +++ b/target/linux/generic/backport-6.1/020-v6.3-13-UPSTREAM-mm-multi-gen-LRU-section-for-rmap-PT-walk-f.patch @@ -0,0 +1,64 @@ +From 5ddf9d53d375e42af49b744bd7c2f8247c6bce15 Mon Sep 17 00:00:00 2001 +From: "T.J. Alumbaugh" +Date: Wed, 18 Jan 2023 00:18:22 +0000 +Subject: [PATCH 13/19] UPSTREAM: mm: multi-gen LRU: section for rmap/PT walk + feedback + +Add a section for lru_gen_look_around() in the code and the design doc. + +Link: https://lkml.kernel.org/r/20230118001827.1040870-3-talumbau@google.com +Change-Id: I5097af63f61b3b69ec2abee6cdbdc33c296df213 +Signed-off-by: T.J. Alumbaugh +Cc: Yu Zhao +Signed-off-by: Andrew Morton +(cherry picked from commit db19a43d9b3a8876552f00f656008206ef9a5efa) +Bug: 274865848 +Signed-off-by: T.J. Mercier +--- + Documentation/mm/multigen_lru.rst | 14 ++++++++++++++ + mm/vmscan.c | 4 ++++ + 2 files changed, 18 insertions(+) + +diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst +index 6e1483e70fdca..bd988a142bc2f 100644 +--- a/Documentation/mm/multigen_lru.rst ++++ b/Documentation/mm/multigen_lru.rst +@@ -156,6 +156,20 @@ This time-based approach has the following advantages: + and memory sizes. + 2. It is more reliable because it is directly wired to the OOM killer. + ++Rmap/PT walk feedback ++--------------------- ++Searching the rmap for PTEs mapping each page on an LRU list (to test ++and clear the accessed bit) can be expensive because pages from ++different VMAs (PA space) are not cache friendly to the rmap (VA ++space). For workloads mostly using mapped pages, searching the rmap ++can incur the highest CPU cost in the reclaim path. ++ ++``lru_gen_look_around()`` exploits spatial locality to reduce the ++trips into the rmap. It scans the adjacent PTEs of a young PTE and ++promotes hot pages. If the scan was done cacheline efficiently, it ++adds the PMD entry pointing to the PTE table to the Bloom filter. This ++forms a feedback loop between the eviction and the aging. ++ + Summary + ------- + The multi-gen LRU can be disassembled into the following parts: +diff --git a/mm/vmscan.c b/mm/vmscan.c +index ab0b8d3b9d88f..8fa82630240d6 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4553,6 +4553,10 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) + } + } + ++/****************************************************************************** ++ * rmap/PT walk feedback ++ ******************************************************************************/ ++ + /* + * This function exploits spatial locality when shrink_folio_list() walks the + * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. If +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-14-UPSTREAM-mm-multi-gen-LRU-section-for-Bloom-filters.patch b/target/linux/generic/backport-6.1/020-v6.3-14-UPSTREAM-mm-multi-gen-LRU-section-for-Bloom-filters.patch new file mode 100644 index 0000000000..f98588c4d8 --- /dev/null +++ b/target/linux/generic/backport-6.1/020-v6.3-14-UPSTREAM-mm-multi-gen-LRU-section-for-Bloom-filters.patch @@ -0,0 +1,250 @@ +From 397624e12244ec038f51cb1f178ccb7a2ec562e5 Mon Sep 17 00:00:00 2001 +From: "T.J. Alumbaugh" +Date: Wed, 18 Jan 2023 00:18:23 +0000 +Subject: [PATCH 14/19] UPSTREAM: mm: multi-gen LRU: section for Bloom filters + +Move Bloom filters code into a dedicated section. Improve the design doc +to explain Bloom filter usage and connection between aging and eviction in +their use. + +Link: https://lkml.kernel.org/r/20230118001827.1040870-4-talumbau@google.com +Change-Id: I73e866f687c1ed9f5c8538086aa39408b79897db +Signed-off-by: T.J. Alumbaugh +Cc: Yu Zhao +Signed-off-by: Andrew Morton +(cherry picked from commit ccbbbb85945d8f0255aa9dbc1b617017e2294f2c) +Bug: 274865848 +Signed-off-by: T.J. Mercier +--- + Documentation/mm/multigen_lru.rst | 16 +++ + mm/vmscan.c | 180 +++++++++++++++--------------- + 2 files changed, 108 insertions(+), 88 deletions(-) + +diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst +index bd988a142bc2f..770b5d539856c 100644 +--- a/Documentation/mm/multigen_lru.rst ++++ b/Documentation/mm/multigen_lru.rst +@@ -170,6 +170,22 @@ promotes hot pages. If the scan was done cacheline efficiently, it + adds the PMD entry pointing to the PTE table to the Bloom filter. This + forms a feedback loop between the eviction and the aging. + ++Bloom Filters ++------------- ++Bloom filters are a space and memory efficient data structure for set ++membership test, i.e., test if an element is not in the set or may be ++in the set. ++ ++In the eviction path, specifically, in ``lru_gen_look_around()``, if a ++PMD has a sufficient number of hot pages, its address is placed in the ++filter. In the aging path, set membership means that the PTE range ++will be scanned for young pages. ++ ++Note that Bloom filters are probabilistic on set membership. If a test ++is false positive, the cost is an additional scan of a range of PTEs, ++which may yield hot pages anyway. Parameters of the filter itself can ++control the false positive rate in the limit. ++ + Summary + ------- + The multi-gen LRU can be disassembled into the following parts: +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 8fa82630240d6..74b4f9d660b56 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -3208,6 +3208,98 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) + get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS; + } + ++/****************************************************************************** ++ * Bloom filters ++ ******************************************************************************/ ++ ++/* ++ * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when ++ * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of ++ * bits in a bitmap, k is the number of hash functions and n is the number of ++ * inserted items. ++ * ++ * Page table walkers use one of the two filters to reduce their search space. ++ * To get rid of non-leaf entries that no longer have enough leaf entries, the ++ * aging uses the double-buffering technique to flip to the other filter each ++ * time it produces a new generation. For non-leaf entries that have enough ++ * leaf entries, the aging carries them over to the next generation in ++ * walk_pmd_range(); the eviction also report them when walking the rmap ++ * in lru_gen_look_around(). ++ * ++ * For future optimizations: ++ * 1. It's not necessary to keep both filters all the time. The spare one can be ++ * freed after the RCU grace period and reallocated if needed again. ++ * 2. And when reallocating, it's worth scaling its size according to the number ++ * of inserted entries in the other filter, to reduce the memory overhead on ++ * small systems and false positives on large systems. ++ * 3. Jenkins' hash function is an alternative to Knuth's. ++ */ ++#define BLOOM_FILTER_SHIFT 15 ++ ++static inline int filter_gen_from_seq(unsigned long seq) ++{ ++ return seq % NR_BLOOM_FILTERS; ++} ++ ++static void get_item_key(void *item, int *key) ++{ ++ u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2); ++ ++ BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32)); ++ ++ key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1); ++ key[1] = hash >> BLOOM_FILTER_SHIFT; ++} ++ ++static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) ++{ ++ int key[2]; ++ unsigned long *filter; ++ int gen = filter_gen_from_seq(seq); ++ ++ filter = READ_ONCE(lruvec->mm_state.filters[gen]); ++ if (!filter) ++ return true; ++ ++ get_item_key(item, key); ++ ++ return test_bit(key[0], filter) && test_bit(key[1], filter); ++} ++ ++static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) ++{ ++ int key[2]; ++ unsigned long *filter; ++ int gen = filter_gen_from_seq(seq); ++ ++ filter = READ_ONCE(lruvec->mm_state.filters[gen]); ++ if (!filter) ++ return; ++ ++ get_item_key(item, key); ++ ++ if (!test_bit(key[0], filter)) ++ set_bit(key[0], filter); ++ if (!test_bit(key[1], filter)) ++ set_bit(key[1], filter); ++} ++ ++static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq) ++{ ++ unsigned long *filter; ++ int gen = filter_gen_from_seq(seq); ++ ++ filter = lruvec->mm_state.filters[gen]; ++ if (filter) { ++ bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT)); ++ return; ++ } ++ ++ filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT), ++ __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); ++ WRITE_ONCE(lruvec->mm_state.filters[gen], filter); ++} ++ + /****************************************************************************** + * mm_struct list + ******************************************************************************/ +@@ -3333,94 +3425,6 @@ void lru_gen_migrate_mm(struct mm_struct *mm) + } + #endif + +-/* +- * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when +- * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of +- * bits in a bitmap, k is the number of hash functions and n is the number of +- * inserted items. +- * +- * Page table walkers use one of the two filters to reduce their search space. +- * To get rid of non-leaf entries that no longer have enough leaf entries, the +- * aging uses the double-buffering technique to flip to the other filter each +- * time it produces a new generation. For non-leaf entries that have enough +- * leaf entries, the aging carries them over to the next generation in +- * walk_pmd_range(); the eviction also report them when walking the rmap +- * in lru_gen_look_around(). +- * +- * For future optimizations: +- * 1. It's not necessary to keep both filters all the time. The spare one can be +- * freed after the RCU grace period and reallocated if needed again. +- * 2. And when reallocating, it's worth scaling its size according to the number +- * of inserted entries in the other filter, to reduce the memory overhead on +- * small systems and false positives on large systems. +- * 3. Jenkins' hash function is an alternative to Knuth's. +- */ +-#define BLOOM_FILTER_SHIFT 15 +- +-static inline int filter_gen_from_seq(unsigned long seq) +-{ +- return seq % NR_BLOOM_FILTERS; +-} +- +-static void get_item_key(void *item, int *key) +-{ +- u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2); +- +- BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32)); +- +- key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1); +- key[1] = hash >> BLOOM_FILTER_SHIFT; +-} +- +-static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq) +-{ +- unsigned long *filter; +- int gen = filter_gen_from_seq(seq); +- +- filter = lruvec->mm_state.filters[gen]; +- if (filter) { +- bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT)); +- return; +- } +- +- filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT), +- __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); +- WRITE_ONCE(lruvec->mm_state.filters[gen], filter); +-} +- +-static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +-{ +- int key[2]; +- unsigned long *filter; +- int gen = filter_gen_from_seq(seq); +- +- filter = READ_ONCE(lruvec->mm_state.filters[gen]); +- if (!filter) +- return; +- +- get_item_key(item, key); +- +- if (!test_bit(key[0], filter)) +- set_bit(key[0], filter); +- if (!test_bit(key[1], filter)) +- set_bit(key[1], filter); +-} +- +-static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +-{ +- int key[2]; +- unsigned long *filter; +- int gen = filter_gen_from_seq(seq); +- +- filter = READ_ONCE(lruvec->mm_state.filters[gen]); +- if (!filter) +- return true; +- +- get_item_key(item, key); +- +- return test_bit(key[0], filter) && test_bit(key[1], filter); +-} +- + static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last) + { + int i; +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-15-UPSTREAM-mm-multi-gen-LRU-section-for-memcg-LRU.patch b/target/linux/generic/backport-6.1/020-v6.3-15-UPSTREAM-mm-multi-gen-LRU-section-for-memcg-LRU.patch new file mode 100644 index 0000000000..c26e28c3ca --- /dev/null +++ b/target/linux/generic/backport-6.1/020-v6.3-15-UPSTREAM-mm-multi-gen-LRU-section-for-memcg-LRU.patch @@ -0,0 +1,440 @@ +From 48c916b812652f9453be5bd45a703728926d41ca Mon Sep 17 00:00:00 2001 +From: "T.J. Alumbaugh" +Date: Wed, 18 Jan 2023 00:18:24 +0000 +Subject: [PATCH 15/19] UPSTREAM: mm: multi-gen LRU: section for memcg LRU + +Move memcg LRU code into a dedicated section. Improve the design doc to +outline its architecture. + +Link: https://lkml.kernel.org/r/20230118001827.1040870-5-talumbau@google.com +Change-Id: Id252e420cff7a858acb098cf2b3642da5c40f602 +Signed-off-by: T.J. Alumbaugh +Cc: Yu Zhao +Signed-off-by: Andrew Morton +(cherry picked from commit 36c7b4db7c942ae9e1b111f0c6b468c8b2e33842) +Bug: 274865848 +Signed-off-by: T.J. Mercier +--- + Documentation/mm/multigen_lru.rst | 33 +++- + include/linux/mm_inline.h | 17 -- + include/linux/mmzone.h | 13 +- + mm/memcontrol.c | 8 +- + mm/vmscan.c | 250 +++++++++++++++++------------- + 5 files changed, 178 insertions(+), 143 deletions(-) + +diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst +index 770b5d539856c..5f1f6ecbb79b9 100644 +--- a/Documentation/mm/multigen_lru.rst ++++ b/Documentation/mm/multigen_lru.rst +@@ -186,9 +186,40 @@ is false positive, the cost is an additional scan of a range of PTEs, + which may yield hot pages anyway. Parameters of the filter itself can + control the false positive rate in the limit. + ++Memcg LRU ++--------- ++An memcg LRU is a per-node LRU of memcgs. It is also an LRU of LRUs, ++since each node and memcg combination has an LRU of folios (see ++``mem_cgroup_lruvec()``). Its goal is to improve the scalability of ++global reclaim, which is critical to system-wide memory overcommit in ++data centers. Note that memcg LRU only applies to global reclaim. ++ ++The basic structure of an memcg LRU can be understood by an analogy to ++the active/inactive LRU (of folios): ++ ++1. It has the young and the old (generations), i.e., the counterparts ++ to the active and the inactive; ++2. The increment of ``max_seq`` triggers promotion, i.e., the ++ counterpart to activation; ++3. Other events trigger similar operations, e.g., offlining an memcg ++ triggers demotion, i.e., the counterpart to deactivation. ++ ++In terms of global reclaim, it has two distinct features: ++ ++1. Sharding, which allows each thread to start at a random memcg (in ++ the old generation) and improves parallelism; ++2. Eventual fairness, which allows direct reclaim to bail out at will ++ and reduces latency without affecting fairness over some time. ++ ++In terms of traversing memcgs during global reclaim, it improves the ++best-case complexity from O(n) to O(1) and does not affect the ++worst-case complexity O(n). Therefore, on average, it has a sublinear ++complexity. ++ + Summary + ------- +-The multi-gen LRU can be disassembled into the following parts: ++The multi-gen LRU (of folios) can be disassembled into the following ++parts: + + * Generations + * Rmap walks +diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h +index 9a8e2049333c0..5567f4850243b 100644 +--- a/include/linux/mm_inline.h ++++ b/include/linux/mm_inline.h +@@ -122,18 +122,6 @@ static inline bool lru_gen_in_fault(void) + return current->in_lru_fault; + } + +-#ifdef CONFIG_MEMCG +-static inline int lru_gen_memcg_seg(struct lruvec *lruvec) +-{ +- return READ_ONCE(lruvec->lrugen.seg); +-} +-#else +-static inline int lru_gen_memcg_seg(struct lruvec *lruvec) +-{ +- return 0; +-} +-#endif +- + static inline int lru_gen_from_seq(unsigned long seq) + { + return seq % MAX_NR_GENS; +@@ -309,11 +297,6 @@ static inline bool lru_gen_in_fault(void) + return false; + } + +-static inline int lru_gen_memcg_seg(struct lruvec *lruvec) +-{ +- return 0; +-} +- + static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) + { + return false; +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 66e067a635682..403c7461e7a70 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -368,15 +368,6 @@ struct page_vma_mapped_walk; + #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) + #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) + +-/* see the comment on MEMCG_NR_GENS */ +-enum { +- MEMCG_LRU_NOP, +- MEMCG_LRU_HEAD, +- MEMCG_LRU_TAIL, +- MEMCG_LRU_OLD, +- MEMCG_LRU_YOUNG, +-}; +- + #ifdef CONFIG_LRU_GEN + + enum { +@@ -557,7 +548,7 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg); + void lru_gen_online_memcg(struct mem_cgroup *memcg); + void lru_gen_offline_memcg(struct mem_cgroup *memcg); + void lru_gen_release_memcg(struct mem_cgroup *memcg); +-void lru_gen_rotate_memcg(struct lruvec *lruvec, int op); ++void lru_gen_soft_reclaim(struct lruvec *lruvec); + + #else /* !CONFIG_MEMCG */ + +@@ -608,7 +599,7 @@ static inline void lru_gen_release_memcg(struct mem_cgroup *memcg) + { + } + +-static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) ++static inline void lru_gen_soft_reclaim(struct lruvec *lruvec) + { + } + +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index 7815d556e38cc..5397aeb43986d 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -478,12 +478,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid) + struct mem_cgroup_tree_per_node *mctz; + + if (lru_gen_enabled()) { +- struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec; +- +- /* see the comment on MEMCG_NR_GENS */ +- if (soft_limit_excess(memcg) && lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD) +- lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD); +- ++ if (soft_limit_excess(memcg)) ++ lru_gen_soft_reclaim(&memcg->nodeinfo[nid]->lruvec); + return; + } + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 74b4f9d660b56..ccde215c084ca 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4689,6 +4689,148 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + mem_cgroup_unlock_pages(); + } + ++/****************************************************************************** ++ * memcg LRU ++ ******************************************************************************/ ++ ++/* see the comment on MEMCG_NR_GENS */ ++enum { ++ MEMCG_LRU_NOP, ++ MEMCG_LRU_HEAD, ++ MEMCG_LRU_TAIL, ++ MEMCG_LRU_OLD, ++ MEMCG_LRU_YOUNG, ++}; ++ ++#ifdef CONFIG_MEMCG ++ ++static int lru_gen_memcg_seg(struct lruvec *lruvec) ++{ ++ return READ_ONCE(lruvec->lrugen.seg); ++} ++ ++static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) ++{ ++ int seg; ++ int old, new; ++ int bin = get_random_u32_below(MEMCG_NR_BINS); ++ struct pglist_data *pgdat = lruvec_pgdat(lruvec); ++ ++ spin_lock(&pgdat->memcg_lru.lock); ++ ++ VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); ++ ++ seg = 0; ++ new = old = lruvec->lrugen.gen; ++ ++ /* see the comment on MEMCG_NR_GENS */ ++ if (op == MEMCG_LRU_HEAD) ++ seg = MEMCG_LRU_HEAD; ++ else if (op == MEMCG_LRU_TAIL) ++ seg = MEMCG_LRU_TAIL; ++ else if (op == MEMCG_LRU_OLD) ++ new = get_memcg_gen(pgdat->memcg_lru.seq); ++ else if (op == MEMCG_LRU_YOUNG) ++ new = get_memcg_gen(pgdat->memcg_lru.seq + 1); ++ else ++ VM_WARN_ON_ONCE(true); ++ ++ hlist_nulls_del_rcu(&lruvec->lrugen.list); ++ ++ if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD) ++ hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]); ++ else ++ hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]); ++ ++ pgdat->memcg_lru.nr_memcgs[old]--; ++ pgdat->memcg_lru.nr_memcgs[new]++; ++ ++ lruvec->lrugen.gen = new; ++ WRITE_ONCE(lruvec->lrugen.seg, seg); ++ ++ if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) ++ WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); ++ ++ spin_unlock(&pgdat->memcg_lru.lock); ++} ++ ++void lru_gen_online_memcg(struct mem_cgroup *memcg) ++{ ++ int gen; ++ int nid; ++ int bin = get_random_u32_below(MEMCG_NR_BINS); ++ ++ for_each_node(nid) { ++ struct pglist_data *pgdat = NODE_DATA(nid); ++ struct lruvec *lruvec = get_lruvec(memcg, nid); ++ ++ spin_lock(&pgdat->memcg_lru.lock); ++ ++ VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list)); ++ ++ gen = get_memcg_gen(pgdat->memcg_lru.seq); ++ ++ hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]); ++ pgdat->memcg_lru.nr_memcgs[gen]++; ++ ++ lruvec->lrugen.gen = gen; ++ ++ spin_unlock(&pgdat->memcg_lru.lock); ++ } ++} ++ ++void lru_gen_offline_memcg(struct mem_cgroup *memcg) ++{ ++ int nid; ++ ++ for_each_node(nid) { ++ struct lruvec *lruvec = get_lruvec(memcg, nid); ++ ++ lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD); ++ } ++} ++ ++void lru_gen_release_memcg(struct mem_cgroup *memcg) ++{ ++ int gen; ++ int nid; ++ ++ for_each_node(nid) { ++ struct pglist_data *pgdat = NODE_DATA(nid); ++ struct lruvec *lruvec = get_lruvec(memcg, nid); ++ ++ spin_lock(&pgdat->memcg_lru.lock); ++ ++ VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); ++ ++ gen = lruvec->lrugen.gen; ++ ++ hlist_nulls_del_rcu(&lruvec->lrugen.list); ++ pgdat->memcg_lru.nr_memcgs[gen]--; ++ ++ if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq)) ++ WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); ++ ++ spin_unlock(&pgdat->memcg_lru.lock); ++ } ++} ++ ++void lru_gen_soft_reclaim(struct lruvec *lruvec) ++{ ++ /* see the comment on MEMCG_NR_GENS */ ++ if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD) ++ lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD); ++} ++ ++#else /* !CONFIG_MEMCG */ ++ ++static int lru_gen_memcg_seg(struct lruvec *lruvec) ++{ ++ return 0; ++} ++ ++#endif ++ + /****************************************************************************** + * the eviction + ******************************************************************************/ +@@ -5386,53 +5528,6 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control * + pgdat->kswapd_failures = 0; + } + +-#ifdef CONFIG_MEMCG +-void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) +-{ +- int seg; +- int old, new; +- int bin = get_random_u32_below(MEMCG_NR_BINS); +- struct pglist_data *pgdat = lruvec_pgdat(lruvec); +- +- spin_lock(&pgdat->memcg_lru.lock); +- +- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); +- +- seg = 0; +- new = old = lruvec->lrugen.gen; +- +- /* see the comment on MEMCG_NR_GENS */ +- if (op == MEMCG_LRU_HEAD) +- seg = MEMCG_LRU_HEAD; +- else if (op == MEMCG_LRU_TAIL) +- seg = MEMCG_LRU_TAIL; +- else if (op == MEMCG_LRU_OLD) +- new = get_memcg_gen(pgdat->memcg_lru.seq); +- else if (op == MEMCG_LRU_YOUNG) +- new = get_memcg_gen(pgdat->memcg_lru.seq + 1); +- else +- VM_WARN_ON_ONCE(true); +- +- hlist_nulls_del_rcu(&lruvec->lrugen.list); +- +- if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD) +- hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]); +- else +- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]); +- +- pgdat->memcg_lru.nr_memcgs[old]--; +- pgdat->memcg_lru.nr_memcgs[new]++; +- +- lruvec->lrugen.gen = new; +- WRITE_ONCE(lruvec->lrugen.seg, seg); +- +- if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) +- WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); +- +- spin_unlock(&pgdat->memcg_lru.lock); +-} +-#endif +- + /****************************************************************************** + * state change + ******************************************************************************/ +@@ -6078,67 +6173,6 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) + } + } + +-void lru_gen_online_memcg(struct mem_cgroup *memcg) +-{ +- int gen; +- int nid; +- int bin = get_random_u32_below(MEMCG_NR_BINS); +- +- for_each_node(nid) { +- struct pglist_data *pgdat = NODE_DATA(nid); +- struct lruvec *lruvec = get_lruvec(memcg, nid); +- +- spin_lock(&pgdat->memcg_lru.lock); +- +- VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list)); +- +- gen = get_memcg_gen(pgdat->memcg_lru.seq); +- +- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]); +- pgdat->memcg_lru.nr_memcgs[gen]++; +- +- lruvec->lrugen.gen = gen; +- +- spin_unlock(&pgdat->memcg_lru.lock); +- } +-} +- +-void lru_gen_offline_memcg(struct mem_cgroup *memcg) +-{ +- int nid; +- +- for_each_node(nid) { +- struct lruvec *lruvec = get_lruvec(memcg, nid); +- +- lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD); +- } +-} +- +-void lru_gen_release_memcg(struct mem_cgroup *memcg) +-{ +- int gen; +- int nid; +- +- for_each_node(nid) { +- struct pglist_data *pgdat = NODE_DATA(nid); +- struct lruvec *lruvec = get_lruvec(memcg, nid); +- +- spin_lock(&pgdat->memcg_lru.lock); +- +- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); +- +- gen = lruvec->lrugen.gen; +- +- hlist_nulls_del_rcu(&lruvec->lrugen.list); +- pgdat->memcg_lru.nr_memcgs[gen]--; +- +- if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq)) +- WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); +- +- spin_unlock(&pgdat->memcg_lru.lock); +- } +-} +- + #endif /* CONFIG_MEMCG */ + + static int __init init_lru_gen(void) +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-16-UPSTREAM-mm-multi-gen-LRU-improve-lru_gen_exit_memcg.patch b/target/linux/generic/backport-6.1/020-v6.3-16-UPSTREAM-mm-multi-gen-LRU-improve-lru_gen_exit_memcg.patch new file mode 100644 index 0000000000..1f3f8ffe6d --- /dev/null +++ b/target/linux/generic/backport-6.1/020-v6.3-16-UPSTREAM-mm-multi-gen-LRU-improve-lru_gen_exit_memcg.patch @@ -0,0 +1,45 @@ +From bec433f29537652ed054148edfd7e2183ddcf7c3 Mon Sep 17 00:00:00 2001 +From: "T.J. Alumbaugh" +Date: Wed, 18 Jan 2023 00:18:25 +0000 +Subject: [PATCH 16/19] UPSTREAM: mm: multi-gen LRU: improve + lru_gen_exit_memcg() + +Add warnings and poison ->next. + +Link: https://lkml.kernel.org/r/20230118001827.1040870-6-talumbau@google.com +Change-Id: I53de9e04c1ae941e122b33cd45d2bbb5f34aae0c +Signed-off-by: T.J. Alumbaugh +Cc: Yu Zhao +Signed-off-by: Andrew Morton +(cherry picked from commit 37cc99979d04cca677c0ad5c0acd1149ec165d1b) +Bug: 274865848 +Signed-off-by: T.J. Mercier +--- + mm/vmscan.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index ccde215c084ca..d5d6f8d94f58a 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -6160,12 +6160,17 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg) + int i; + int nid; + ++ VM_WARN_ON_ONCE(!list_empty(&memcg->mm_list.fifo)); ++ + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + ++ VM_WARN_ON_ONCE(lruvec->mm_state.nr_walkers); + VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0, + sizeof(lruvec->lrugen.nr_pages))); + ++ lruvec->lrugen.list.next = LIST_POISON1; ++ + for (i = 0; i < NR_BLOOM_FILTERS; i++) { + bitmap_free(lruvec->mm_state.filters[i]); + lruvec->mm_state.filters[i] = NULL; +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-17-UPSTREAM-mm-multi-gen-LRU-improve-walk_pmd_range.patch b/target/linux/generic/backport-6.1/020-v6.3-17-UPSTREAM-mm-multi-gen-LRU-improve-walk_pmd_range.patch new file mode 100644 index 0000000000..02dd920e54 --- /dev/null +++ b/target/linux/generic/backport-6.1/020-v6.3-17-UPSTREAM-mm-multi-gen-LRU-improve-walk_pmd_range.patch @@ -0,0 +1,140 @@ +From fc0e3b06e0f19917b7ecad7967a72f61d4743644 Mon Sep 17 00:00:00 2001 +From: "T.J. Alumbaugh" +Date: Wed, 18 Jan 2023 00:18:26 +0000 +Subject: [PATCH 17/19] UPSTREAM: mm: multi-gen LRU: improve walk_pmd_range() + +Improve readability of walk_pmd_range() and walk_pmd_range_locked(). + +Link: https://lkml.kernel.org/r/20230118001827.1040870-7-talumbau@google.com +Change-Id: Ia084fbf53fe989673b7804ca8ca520af12d7d52a +Signed-off-by: T.J. Alumbaugh +Cc: Yu Zhao +Signed-off-by: Andrew Morton +(cherry picked from commit b5ff4133617d0eced35b685da0bd0929dd9fabb7) +Bug: 274865848 +Signed-off-by: T.J. Mercier +--- + mm/vmscan.c | 40 ++++++++++++++++++++-------------------- + 1 file changed, 20 insertions(+), 20 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index d5d6f8d94f58a..8f496c2e670a9 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -3980,8 +3980,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, + } + + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) +-static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, +- struct mm_walk *args, unsigned long *bitmap, unsigned long *start) ++static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma, ++ struct mm_walk *args, unsigned long *bitmap, unsigned long *first) + { + int i; + pmd_t *pmd; +@@ -3994,18 +3994,19 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area + VM_WARN_ON_ONCE(pud_leaf(*pud)); + + /* try to batch at most 1+MIN_LRU_BATCH+1 entries */ +- if (*start == -1) { +- *start = next; ++ if (*first == -1) { ++ *first = addr; ++ bitmap_zero(bitmap, MIN_LRU_BATCH); + return; + } + +- i = next == -1 ? 0 : pmd_index(next) - pmd_index(*start); ++ i = addr == -1 ? 0 : pmd_index(addr) - pmd_index(*first); + if (i && i <= MIN_LRU_BATCH) { + __set_bit(i - 1, bitmap); + return; + } + +- pmd = pmd_offset(pud, *start); ++ pmd = pmd_offset(pud, *first); + + ptl = pmd_lockptr(args->mm, pmd); + if (!spin_trylock(ptl)) +@@ -4016,15 +4017,16 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area + do { + unsigned long pfn; + struct folio *folio; +- unsigned long addr = i ? (*start & PMD_MASK) + i * PMD_SIZE : *start; ++ ++ /* don't round down the first address */ ++ addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first; + + pfn = get_pmd_pfn(pmd[i], vma, addr); + if (pfn == -1) + goto next; + + if (!pmd_trans_huge(pmd[i])) { +- if (arch_has_hw_nonleaf_pmd_young() && +- get_cap(LRU_GEN_NONLEAF_YOUNG)) ++ if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) + pmdp_test_and_clear_young(vma, addr, pmd + i); + goto next; + } +@@ -4053,12 +4055,11 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area + arch_leave_lazy_mmu_mode(); + spin_unlock(ptl); + done: +- *start = -1; +- bitmap_zero(bitmap, MIN_LRU_BATCH); ++ *first = -1; + } + #else +-static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, +- struct mm_walk *args, unsigned long *bitmap, unsigned long *start) ++static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma, ++ struct mm_walk *args, unsigned long *bitmap, unsigned long *first) + { + } + #endif +@@ -4071,9 +4072,9 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + unsigned long next; + unsigned long addr; + struct vm_area_struct *vma; +- unsigned long pos = -1; ++ unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)]; ++ unsigned long first = -1; + struct lru_gen_mm_walk *walk = args->private; +- unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {}; + + VM_WARN_ON_ONCE(pud_leaf(*pud)); + +@@ -4115,18 +4116,17 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + +- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); ++ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first); + continue; + } + #endif + walk->mm_stats[MM_NONLEAF_TOTAL]++; + +- if (arch_has_hw_nonleaf_pmd_young() && +- get_cap(LRU_GEN_NONLEAF_YOUNG)) { ++ if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (!pmd_young(val)) + continue; + +- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); ++ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first); + } + + if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) +@@ -4143,7 +4143,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i); + } + +- walk_pmd_range_locked(pud, -1, vma, args, bitmap, &pos); ++ walk_pmd_range_locked(pud, -1, vma, args, bitmap, &first); + + if (i < PTRS_PER_PMD && get_next_vma(PUD_MASK, PMD_SIZE, args, &start, &end)) + goto restart; +-- +2.40.1 + diff --git a/target/linux/generic/backport-6.1/020-v6.3-18-UPSTREAM-mm-multi-gen-LRU-simplify-lru_gen_look_arou.patch b/target/linux/generic/backport-6.1/020-v6.3-18-UPSTREAM-mm-multi-gen-LRU-simplify-lru_gen_look_arou.patch new file mode 100644 index 0000000000..caee6b096f --- /dev/null +++ b/target/linux/generic/backport-6.1/020-v6.3-18-UPSTREAM-mm-multi-gen-LRU-simplify-lru_gen_look_arou.patch @@ -0,0 +1,153 @@ +From e604c3ccb4dfbdde2467fccef9bb36170a392695 Mon Sep 17 00:00:00 2001 +From: "T.J. Alumbaugh" +Date: Wed, 18 Jan 2023 00:18:27 +0000 +Subject: [PATCH 18/19] UPSTREAM: mm: multi-gen LRU: simplify + lru_gen_look_around() + +Update the folio generation in place with or without +current->reclaim_state->mm_walk. The LRU lock is held for longer, if +mm_walk is NULL and the number of folios to update is more than +PAGEVEC_SIZE. + +This causes a measurable regression from the LRU lock contention during a +microbencmark. But a tiny regression is not worth the complexity. + +Link: https://lkml.kernel.org/r/20230118001827.1040870-8-talumbau@google.com +Change-Id: I9ce18b4f4062e6c1c13c98ece9422478eb8e1846 +Signed-off-by: T.J. Alumbaugh +Cc: Yu Zhao +Signed-off-by: Andrew Morton +(cherry picked from commit abf086721a2f1e6897c57796f7268df1b194c750) +Bug: 274865848 +Signed-off-by: T.J. Mercier +--- + mm/vmscan.c | 73 +++++++++++++++++------------------------------------ + 1 file changed, 23 insertions(+), 50 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 8f496c2e670a9..f6ce7a1fd78a3 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4571,13 +4571,12 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) + void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + { + int i; +- pte_t *pte; + unsigned long start; + unsigned long end; +- unsigned long addr; + struct lru_gen_mm_walk *walk; + int young = 0; +- unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {}; ++ pte_t *pte = pvmw->pte; ++ unsigned long addr = pvmw->address; + struct folio *folio = pfn_folio(pvmw->pfn); + struct mem_cgroup *memcg = folio_memcg(folio); + struct pglist_data *pgdat = folio_pgdat(folio); +@@ -4594,25 +4593,28 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + /* avoid taking the LRU lock under the PTL when possible */ + walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; + +- start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start); +- end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1; ++ start = max(addr & PMD_MASK, pvmw->vma->vm_start); ++ end = min(addr | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1; + + if (end - start > MIN_LRU_BATCH * PAGE_SIZE) { +- if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2) ++ if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2) + end = start + MIN_LRU_BATCH * PAGE_SIZE; +- else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2) ++ else if (end - addr < MIN_LRU_BATCH * PAGE_SIZE / 2) + start = end - MIN_LRU_BATCH * PAGE_SIZE; + else { +- start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2; +- end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2; ++ start = addr - MIN_LRU_BATCH * PAGE_SIZE / 2; ++ end = addr + MIN_LRU_BATCH * PAGE_SIZE / 2; + } + } + +- pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE; ++ /* folio_update_gen() requires stable folio_memcg() */ ++ if (!mem_cgroup_trylock_pages(memcg)) ++ return; + +- rcu_read_lock(); + arch_enter_lazy_mmu_mode(); + ++ pte -= (addr - start) / PAGE_SIZE; ++ + for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) { + unsigned long pfn; + +@@ -4637,56 +4639,27 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) + !folio_test_swapcache(folio))) + folio_mark_dirty(folio); + ++ if (walk) { ++ old_gen = folio_update_gen(folio, new_gen); ++ if (old_gen >= 0 && old_gen != new_gen) ++ update_batch_size(walk, folio, old_gen, new_gen); ++ ++ continue; ++ } ++ + old_gen = folio_lru_gen(folio); + if (old_gen < 0) + folio_set_referenced(folio); + else if (old_gen != new_gen) +- __set_bit(i, bitmap); ++ folio_activate(folio); + } + + arch_leave_lazy_mmu_mode(); +- rcu_read_unlock(); ++ mem_cgroup_unlock_pages(); + + /* feedback from rmap walkers to page table walkers */ + if (suitable_to_scan(i, young)) + update_bloom_filter(lruvec, max_seq, pvmw->pmd); +- +- if (!walk && bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) { +- for_each_set_bit(i, bitmap, MIN_LRU_BATCH) { +- folio = pfn_folio(pte_pfn(pte[i])); +- folio_activate(folio); +- } +- return; +- } +- +- /* folio_update_gen() requires stable folio_memcg() */ +- if (!mem_cgroup_trylock_pages(memcg)) +- return; +- +- if (!walk) { +- spin_lock_irq(&lruvec->lru_lock); +- new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq); +- } +- +- for_each_set_bit(i, bitmap, MIN_LRU_BATCH) { +- folio = pfn_folio(pte_pfn(pte[i])); +- if (folio_memcg_rcu(folio) != memcg) +- continue; +- +- old_gen = folio_update_gen(folio, new_gen); +- if (old_gen < 0 || old_gen == new_gen) +- continue; +- +- if (walk) +- update_batch_size(walk, folio, old_gen, new_gen); +- else +- lru_gen_update_size(lruvec, folio, old_gen, new_gen); +- } +- +- if (!walk) +- spin_unlock_irq(&lruvec->lru_lock); +- +- mem_cgroup_unlock_pages(); + } + + /****************************************************************************** +-- +2.40.1 + -- 2.30.2