--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
-@@ -333,4 +333,13 @@ static __always_inline void del_page_fro
- update_lru_size(lruvec, page_lru(page), page_zonenum(page),
- -thp_nr_pages(page));
+@@ -606,5 +606,13 @@ static __always_inline void del_page_fro
+ make_pte_marker(PTE_MARKER_UFFD_WP));
+ #endif
}
+
+static inline bool vma_has_recency(struct vm_area_struct *vma)
+
+ return true;
+}
-+
+
#endif
--- a/mm/memory.c
+++ b/mm/memory.c
-@@ -41,6 +41,7 @@
-
- #include <linux/kernel_stat.h>
- #include <linux/mm.h>
-+#include <linux/mm_inline.h>
- #include <linux/sched/mm.h>
- #include <linux/sched/coredump.h>
- #include <linux/sched/numa_balancing.h>
@@ -1353,8 +1354,7 @@ again:
force_flush = 1;
set_page_dirty(page);
- /*
- * Don't treat a reference through
- * a sequentially read mapping as such.
-- * If the page has been used in another mapping,
+- * If the folio has been used in another mapping,
- * we will catch it; if this other mapping is
- * already gone, the unmap path will have set
-- * PG_referenced or activated the page.
+- * the referenced flag or activated the folio.
- */
- if (likely(!(vma->vm_flags & VM_SEQ_READ)))
- referenced++;
return false;
@@ -876,6 +878,7 @@ int page_referenced(struct page *page,
- .rmap_one = page_referenced_one,
.arg = (void *)&pra,
- .anon_lock = page_lock_anon_vma_read,
-+ .invalid_vma = invalid_page_referenced_vma,
+ .anon_lock = folio_lock_anon_vma_read,
+ .try_lock = true,
++ .invalid_vma = invalid_folio_referenced_vma,
};
*vm_flags = 0;
- * cgroups
- */
- if (memcg) {
-- rwc.invalid_vma = invalid_page_referenced_vma;
+- rwc.invalid_vma = invalid_folio_referenced_vma;
- }
-
- rmap_walk(page, &rwc);
+ rmap_walk(folio, &rwc);
*vm_flags = pra.vm_flags;
--- a/mm/vmscan.c
From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:18:59 -0700
Subject: [PATCH 21/29] mm: multi-gen LRU: rename lru_gen_struct to
- lru_gen_page
+ lru_gen_folio
Patch series "mm: multi-gen LRU: memcg LRU", v3.
========
An memcg LRU is a per-node LRU of memcgs. It is also an LRU of LRUs,
-since each node and memcg combination has an LRU of pages (see
+since each node and memcg combination has an LRU of folios (see
mem_cgroup_lruvec()).
Its goal is to improve the scalability of global reclaim, which is
complexity in contrast to the current linear complexity.
The basic structure of an memcg LRU can be understood by an analogy to
-the active/inactive LRU (of pages):
+the active/inactive LRU (of folios):
1. It has the young and the old (generations), i.e., the counterparts
to the active and the inactive;
2. The increment of max_seq triggers promotion, i.e., the counterpart
This patch (of 8):
-The new name lru_gen_page will be more distinct from the coming
+The new name lru_gen_folio will be more distinct from the coming
lru_gen_memcg.
Link: https://lkml.kernel.org/r/20221222041905.2431096-1-yuzhao@google.com
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -168,7 +168,7 @@ static inline void lru_gen_update_size(s
- int zone = page_zonenum(page);
- int delta = thp_nr_pages(page);
+ int zone = folio_zonenum(folio);
+ int delta = thp_nr_folios(folio);
enum lru_list lru = type * LRU_INACTIVE_FILE;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
-@@ -214,7 +214,7 @@ static inline bool lru_gen_add_page(stru
- int gen = page_lru_gen(page);
- int type = page_is_file_lru(page);
- int zone = page_zonenum(page);
+@@ -214,7 +214,7 @@ static inline bool lru_gen_add_folio(stru
+ int gen = folio_lru_gen(folio);
+ int type = folio_is_file_lru(folio);
+ int zone = folio_zonenum(folio);
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
- VM_WARN_ON_ONCE_PAGE(gen != -1, page);
+ VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -394,7 +394,7 @@ enum {
- * The number of pages in each generation is eventually consistent and therefore
+ * The number of folios in each generation is eventually consistent and therefore
* can be transiently negative when reset_batch_size() is pending.
*/
-struct lru_gen_struct {
-+struct lru_gen_page {
++struct lru_gen_folio {
/* the aging increments the youngest generation number */
unsigned long max_seq;
/* the eviction increments the oldest generation numbers */
/* the lruvec under reclaim */
struct lruvec *lruvec;
- /* unstable max_seq from lru_gen_struct */
-+ /* unstable max_seq from lru_gen_page */
++ /* unstable max_seq from lru_gen_folio */
unsigned long max_seq;
/* the next address within an mm to scan */
unsigned long next_addr;
#ifdef CONFIG_LRU_GEN
/* evictable pages divided into generations */
- struct lru_gen_struct lrugen;
-+ struct lru_gen_page lrugen;
++ struct lru_gen_folio lrugen;
/* to concurrently iterate lru_gen_mm_list */
struct lru_gen_mm_state mm_state;
#endif
static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
{
- /* see the comment on lru_gen_struct */
-+ /* see the comment on lru_gen_page */
++ /* see the comment on lru_gen_folio */
return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
struct ctrl_pos *pos)
{
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
pos->refaulted = lrugen->avg_refaulted[type][tier] +
{
int hist, tier;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
-@@ -3408,7 +3408,7 @@ static int page_update_gen(struct page *
- static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming)
+@@ -3408,7 +3408,7 @@ static int folio_update_gen(struct folio *
+ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{
- int type = page_is_file_lru(page);
+ int type = folio_is_file_lru(folio);
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
- unsigned long new_flags, old_flags = READ_ONCE(page->flags);
+ unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
@@ -3453,7 +3453,7 @@ static void update_batch_size(struct lru
static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
{
int gen, type, zone;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
walk->batched = 0;
int zone;
int remaining = MAX_LRU_BATCH;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
if (type == LRU_GEN_ANON && !can_swap)
int gen, type, zone;
bool success = false;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
DEFINE_MIN_SEQ(lruvec);
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
}
- /* see the comment on lru_gen_struct */
-+ /* see the comment on lru_gen_page */
++ /* see the comment on lru_gen_folio */
if (can_swap) {
min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
int prev, next;
int type, zone;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
spin_lock_irq(&lruvec->lru_lock);
struct lru_gen_mm_walk *walk;
struct mm_struct *mm = NULL;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
unsigned long young = 0;
unsigned long total = 0;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
for (type = !can_swap; type < ANON_AND_FILE; type++) {
-@@ -4466,7 +4466,7 @@ static bool sort_page(struct lruvec *lru
- int delta = thp_nr_pages(page);
- int refs = page_lru_refs(page);
+@@ -4466,7 +4466,7 @@ static bool sort_folio(struct lruvec *lru
+ int delta = thp_nr_folios(folio);
+ int refs = folio_lru_refs(folio);
int tier = lru_tier_from_refs(refs);
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
- VM_WARN_ON_ONCE_PAGE(gen >= MAX_NR_GENS, page);
+ VM_WARN_ON_ONCE_folio(gen >= MAX_NR_GENS, folio);
-@@ -4566,7 +4566,7 @@ static int scan_pages(struct lruvec *lru
+@@ -4566,7 +4566,7 @@ static int scan_folios(struct lruvec *lru
int scanned = 0;
int isolated = 0;
int remaining = MAX_LRU_BATCH;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
VM_WARN_ON_ONCE(!list_empty(list));
static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
{
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
if (lrugen->enabled) {
enum lru_list lru;
int type, tier;
int hist = lru_hist_from_seq(seq);
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
for (tier = 0; tier < MAX_NR_TIERS; tier++) {
seq_printf(m, " %10d", tier);
bool full = !debugfs_real_fops(m->file)->write;
struct lruvec *lruvec = v;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
int nid = lruvec_pgdat(lruvec)->node_id;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec);
int i;
int gen, type, zone;
- struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
lrugen->max_seq = MIN_NR_GENS + 1;
lrugen->enabled = lru_gen_enabled();
unsigned long min_seq;
struct lruvec *lruvec;
- struct lru_gen_struct *lrugen;
-+ struct lru_gen_page *lrugen;
- int type = page_is_file_lru(page);
- int delta = thp_nr_pages(page);
- int refs = page_lru_refs(page);
-@@ -252,7 +252,7 @@ static void lru_gen_refault(struct page
++ struct lru_gen_folio *lrugen;
+ int type = folio_is_file_lru(folio);
+ int delta = thp_nr_folios(folio);
+ int refs = folio_lru_refs(folio);
+@@ -252,7 +252,7 @@ static void lru_gen_refault(struct folio
unsigned long token;
unsigned long min_seq;
struct lruvec *lruvec;
- struct lru_gen_struct *lrugen;
-+ struct lru_gen_page *lrugen;
++ struct lru_gen_folio *lrugen;
struct mem_cgroup *memcg;
struct pglist_data *pgdat;
- int type = page_is_file_lru(page);
+ int type = folio_is_file_lru(folio);
From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:19:00 -0700
Subject: [PATCH 22/29] mm: multi-gen LRU: rename lrugen->lists[] to
- lrugen->pages[]
+ lrugen->folios[]
-lru_gen_page will be chained into per-node lists by the coming
+lru_gen_folio will be chained into per-node lists by the coming
lrugen->list.
Link: https://lkml.kernel.org/r/20221222041905.2431096-3-yuzhao@google.com
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
-@@ -246,9 +246,9 @@ static inline bool lru_gen_add_page(stru
- lru_gen_update_size(lruvec, page, -1, gen);
- /* for rotate_reclaimable_page() */
+@@ -246,9 +246,9 @@ static inline bool lru_gen_add_folio(stru
+ lru_gen_update_size(lruvec, folio, -1, gen);
+ /* for rotate_reclaimable_folio() */
if (reclaiming)
-- list_add_tail(&page->lru, &lrugen->lists[gen][type][zone]);
-+ list_add_tail(&page->lru, &lrugen->pages[gen][type][zone]);
+- list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]);
++ list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
else
-- list_add(&page->lru, &lrugen->lists[gen][type][zone]);
-+ list_add(&page->lru, &lrugen->pages[gen][type][zone]);
+- list_add(&folio->lru, &lrugen->lists[gen][type][zone]);
++ list_add(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;
}
@@ -302,7 +302,7 @@ enum lruvec_flags {
* They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
* offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
- * corresponding generation. The gen counter in page->flags stores gen+1 while
+ * corresponding generation. The gen counter in folio->flags stores gen+1 while
- * a page is on one of lrugen->lists[]. Otherwise it stores 0.
-+ * a page is on one of lrugen->pages[]. Otherwise it stores 0.
++ * a page is on one of lrugen->folios[]. Otherwise it stores 0.
*
- * A page is added to the youngest generation on faulting. The aging needs to
- * check the accessed bit at least twice before handing this page over to the
+ * A folio is added to the youngest generation on faulting. The aging needs to
+ * check the accessed bit at least twice before handing this folio over to the
@@ -314,8 +314,8 @@ enum lruvec_flags {
* rest of generations, if they exist, are considered inactive. See
* lru_gen_is_active().
*
- * PG_active is always cleared while a page is on one of lrugen->lists[] so that
- * the aging needs not to worry about it. And it's set again when a page
-+ * PG_active is always cleared while a page is on one of lrugen->pages[] so
++ * PG_active is always cleared while a page is on one of lrugen->folios[] so
+ * that the aging needs not to worry about it. And it's set again when a page
* considered active is isolated for non-reclaiming purposes, e.g., migration.
- * See lru_gen_add_page() and lru_gen_del_page().
+ * See lru_gen_add_folio() and lru_gen_del_folio().
*
-@@ -402,7 +402,7 @@ struct lru_gen_page {
+@@ -402,7 +402,7 @@ struct lru_gen_folio {
/* the birth time of each generation in jiffies */
unsigned long timestamps[MAX_NR_GENS];
/* the multi-gen LRU lists, lazily sorted on eviction */
- struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
-+ struct list_head pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
++ struct list_head folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
/* the multi-gen LRU sizes, eventually consistent */
- long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+ long nr_folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
/* the exponential moving average of refaulted */
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
/* prevent cold/hot inversion if force_scan is true */
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
- struct list_head *head = &lrugen->lists[old_gen][type][zone];
-+ struct list_head *head = &lrugen->pages[old_gen][type][zone];
++ struct list_head *head = &lrugen->folios[old_gen][type][zone];
while (!list_empty(head)) {
- struct page *page = lru_to_page(head);
+ struct folio *folio = lru_to_folio(head);
@@ -3998,7 +3998,7 @@ static bool inc_min_seq(struct lruvec *l
- VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page);
+ VM_WARN_ON_ONCE_folio(folio_zonenum(folio) != zone, folio);
- new_gen = page_inc_gen(lruvec, page, false);
-- list_move_tail(&page->lru, &lrugen->lists[new_gen][type][zone]);
-+ list_move_tail(&page->lru, &lrugen->pages[new_gen][type][zone]);
+ new_gen = folio_inc_gen(lruvec, folio, false);
+- list_move_tail(&folio->lru, &lrugen->lists[new_gen][type][zone]);
++ list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]);
if (!--remaining)
return false;
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
- if (!list_empty(&lrugen->lists[gen][type][zone]))
-+ if (!list_empty(&lrugen->pages[gen][type][zone]))
++ if (!list_empty(&lrugen->folios[gen][type][zone]))
goto next;
}
-@@ -4491,7 +4491,7 @@ static bool sort_page(struct lruvec *lru
+@@ -4491,7 +4491,7 @@ static bool sort_folio(struct lruvec *lru
/* promoted */
if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
-- list_move(&page->lru, &lrugen->lists[gen][type][zone]);
-+ list_move(&page->lru, &lrugen->pages[gen][type][zone]);
+- list_move(&folio->lru, &lrugen->lists[gen][type][zone]);
++ list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;
}
-@@ -4500,7 +4500,7 @@ static bool sort_page(struct lruvec *lru
+@@ -4500,7 +4500,7 @@ static bool sort_folio(struct lruvec *lru
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
- gen = page_inc_gen(lruvec, page, false);
-- list_move_tail(&page->lru, &lrugen->lists[gen][type][zone]);
-+ list_move_tail(&page->lru, &lrugen->pages[gen][type][zone]);
+ gen = folio_inc_gen(lruvec, folio, false);
+- list_move_tail(&folio->lru, &lrugen->lists[gen][type][zone]);
++ list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
lrugen->protected[hist][type][tier - 1] + delta);
-@@ -4512,7 +4512,7 @@ static bool sort_page(struct lruvec *lru
- if (PageLocked(page) || PageWriteback(page) ||
- (type == LRU_GEN_FILE && PageDirty(page))) {
- gen = page_inc_gen(lruvec, page, true);
-- list_move(&page->lru, &lrugen->lists[gen][type][zone]);
-+ list_move(&page->lru, &lrugen->pages[gen][type][zone]);
+@@ -4512,7 +4512,7 @@ static bool sort_folio(struct lruvec *lru
+ if (folioLocked(folio) || folioWriteback(folio) ||
+ (type == LRU_GEN_FILE && folioDirty(folio))) {
+ gen = folio_inc_gen(lruvec, folio, true);
+- list_move(&folio->lru, &lrugen->lists[gen][type][zone]);
++ list_move(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;
}
-@@ -4579,7 +4579,7 @@ static int scan_pages(struct lruvec *lru
+@@ -4579,7 +4579,7 @@ static int scan_folios(struct lruvec *lru
for (zone = sc->reclaim_idx; zone >= 0; zone--) {
LIST_HEAD(moved);
int skipped = 0;
- struct list_head *head = &lrugen->lists[gen][type][zone];
-+ struct list_head *head = &lrugen->pages[gen][type][zone];
++ struct list_head *head = &lrugen->folios[gen][type][zone];
while (!list_empty(head)) {
- struct page *page = lru_to_page(head);
+ struct folio *folio = lru_to_folio(head);
@@ -4980,7 +4980,7 @@ static bool __maybe_unused state_is_vali
int gen, type, zone;
for_each_gen_type_zone(gen, type, zone) {
- if (!list_empty(&lrugen->lists[gen][type][zone]))
-+ if (!list_empty(&lrugen->pages[gen][type][zone]))
++ if (!list_empty(&lrugen->folios[gen][type][zone]))
return false;
}
}
for_each_gen_type_zone(gen, type, zone) {
- struct list_head *head = &lruvec->lrugen.lists[gen][type][zone];
-+ struct list_head *head = &lruvec->lrugen.pages[gen][type][zone];
++ struct list_head *head = &lruvec->lrugen.folios[gen][type][zone];
while (!list_empty(head)) {
bool success;
for_each_gen_type_zone(gen, type, zone)
- INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
-+ INIT_LIST_HEAD(&lrugen->pages[gen][type][zone]);
++ INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);
lruvec->mm_state.seq = MIN_NR_GENS;
init_waitqueue_head(&lruvec->mm_state.wait);
Subject: [PATCH 23/29] mm: multi-gen LRU: remove eviction fairness safeguard
Recall that the eviction consumes the oldest generation: first it
-bucket-sorts pages whose gen counters were updated by the aging and
+bucket-sorts folios whose gen counters were updated by the aging and
reclaims the rest; then it increments lrugen->min_seq.
The current eviction fairness safeguard for global reclaim has a
static bool writeback_throttling_sane(struct scan_control *sc)
{
return true;
-@@ -4722,8 +4732,7 @@ static int isolate_pages(struct lruvec *
+@@ -4722,8 +4732,7 @@ static int isolate_folios(struct lruvec *
return scanned;
}
--static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
-- bool *need_swapping)
-+static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
+-static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
+- bool *need_swapping)
++static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
{
int type;
int scanned;
return scanned;
}
-@@ -4853,68 +4859,26 @@ done:
+@@ -4853,67 +4859,26 @@ done:
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
}
- DEFINE_MAX_SEQ(lruvec);
-
- if (!current_is_kswapd()) {
-- /* age each memcg once to ensure fairness */
+- /* age each memcg at most once to ensure fairness */
- if (max_seq - seq > 1)
- return true;
-
-
- /*
- * A minimum amount of work was done under global memory pressure. For
-- * kswapd, it may be overshooting. For direct reclaim, the target isn't
-- * met, and yet the allocation may still succeed, since kswapd may have
-- * caught up. In either case, it's better to stop now, and restart if
-- * necessary.
+- * kswapd, it may be overshooting. For direct reclaim, the allocation
+- * may succeed if all suitable zones are somewhat safe. In either case,
+- * it's better to stop now, and restart later if necessary.
- */
- for (i = 0; i <= sc->reclaim_idx; i++) {
- unsigned long wmark;
if (!nr_to_scan)
goto done;
-- delta = evict_pages(lruvec, sc, swappiness, &need_swapping);
-+ delta = evict_pages(lruvec, sc, swappiness);
+- delta = evict_folios(lruvec, sc, swappiness, &need_swapping);
++ delta = evict_folios(lruvec, sc, swappiness);
if (!delta)
goto done;
if (sc->nr_reclaimed >= nr_to_reclaim)
return 0;
-- if (!evict_pages(lruvec, sc, swappiness, NULL))
-+ if (!evict_pages(lruvec, sc, swappiness))
+- if (!evict_folios(lruvec, sc, swappiness, NULL))
++ if (!evict_folios(lruvec, sc, swappiness))
return 0;
cond_resched();
Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard
Recall that the aging produces the youngest generation: first it scans
-for accessed pages and updates their gen counters; then it increments
+for accessed folios and updates their gen counters; then it increments
lrugen->max_seq.
The current aging fairness safeguard for kswapd uses two passes to
ensure the fairness to multiple eligible memcgs. On the first pass,
which is shared with the eviction, it checks whether all eligible
-memcgs are low on cold pages. If so, it requires a second pass, on
+memcgs are low on cold folios. If so, it requires a second pass, on
which it ages all those memcgs at the same time.
With memcg LRU, the aging, while ensuring eventual fairness, will run
int gen, type, zone;
@@ -4193,6 +4192,13 @@ static bool should_run_aging(struct lruv
unsigned long total = 0;
- struct lru_gen_page *lrugen = &lruvec->lrugen;
+ struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ DEFINE_MIN_SEQ(lruvec);
+
-+ /* whether this lruvec is completely out of cold pages */
++ /* whether this lruvec is completely out of cold folios */
+ if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
+ *nr_to_scan = 0;
+ return true;
+ int gen, type, zone;
+ unsigned long total = 0;
+ bool can_swap = get_swappiness(lruvec, sc);
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec);
DEFINE_MIN_SEQ(lruvec);
- if (!nr_to_scan && sc->priority != DEF_PRIORITY)
- return false;
- }
-+ /* see the comment on lru_gen_page */
++ /* see the comment on lru_gen_folio */
+ gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
+ birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
- return nr_to_scan;
-done:
- return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
-+ /* skip this lruvec as it's low on cold pages */
++ /* skip this lruvec as it's low on cold folios */
+ return 0;
}
- goto done;
+ break;
- delta = evict_pages(lruvec, sc, swappiness);
+ delta = evict_folios(lruvec, sc, swappiness);
if (!delta)
- goto done;
+ break;
- unsigned long old = 0;
- unsigned long young = 0;
- unsigned long total = 0;
-- struct lru_gen_page *lrugen = &lruvec->lrugen;
+- struct lru_gen_folio *lrugen = &lruvec->lrugen;
- struct mem_cgroup *memcg = lruvec_memcg(lruvec);
- DEFINE_MIN_SEQ(lruvec);
-
-- /* whether this lruvec is completely out of cold pages */
+- /* whether this lruvec is completely out of cold folios */
- if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
- *nr_to_scan = 0;
- return true;
+ unsigned long old = 0;
+ unsigned long young = 0;
+ unsigned long total = 0;
-+ struct lru_gen_page *lrugen = &lruvec->lrugen;
++ struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ DEFINE_MIN_SEQ(lruvec);
+
-+ /* whether this lruvec is completely out of cold pages */
++ /* whether this lruvec is completely out of cold folios */
+ if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
+ *nr_to_scan = 0;
+ return true;
From fa6363828d314e837c5f79e97ea5e8c0d2f7f062 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:19:04 -0700
-Subject: [PATCH 26/29] mm: multi-gen LRU: per-node lru_gen_page lists
+Subject: [PATCH 26/29] mm: multi-gen LRU: per-node lru_gen_folio lists
For each node, memcgs are divided into two generations: the old and
the young. For each generation, memcgs are randomly sharded into
include/linux/mm_inline.h | 17 ++
include/linux/mmzone.h | 117 +++++++++++-
mm/memcontrol.c | 16 ++
- mm/page_alloc.c | 1 +
+ mm/folio_alloc.c | 1 +
mm/vmscan.c | 373 +++++++++++++++++++++++++++++++++----
6 files changed, 499 insertions(+), 35 deletions(-)
+ return 0;
+}
+
- static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming)
+ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{
return false;
--- a/include/linux/mmzone.h
#include <linux/wait.h>
#include <linux/bitops.h>
#include <linux/cache.h>
-@@ -357,6 +358,15 @@ struct page_vma_mapped_walk;
+@@ -357,6 +358,15 @@ struct folio_vma_mapped_walk;
#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
#ifdef CONFIG_LRU_GEN
enum {
-@@ -416,6 +426,14 @@ struct lru_gen_page {
+@@ -416,6 +426,14 @@ struct lru_gen_folio {
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
/* whether the multi-gen LRU is enabled */
bool enabled;
+#ifdef CONFIG_MEMCG
-+ /* the memcg generation this lru_gen_page belongs to */
++ /* the memcg generation this lru_gen_folio belongs to */
+ u8 gen;
-+ /* the list segment this lru_gen_page belongs to */
++ /* the list segment this lru_gen_folio belongs to */
+ u8 seg;
-+ /* per-node lru_gen_page list for global reclaim */
++ /* per-node lru_gen_folio list for global reclaim */
+ struct hlist_nulls_node list;
+#endif
};
+struct lru_gen_memcg {
+ /* the per-node memcg generation counter */
+ unsigned long seq;
-+ /* each memcg has one lru_gen_page per node */
++ /* each memcg has one lru_gen_folio per node */
+ unsigned long nr_memcgs[MEMCG_NR_GENS];
-+ /* per-node lru_gen_page list for global reclaim */
++ /* per-node lru_gen_folio list for global reclaim */
+ struct hlist_nulls_head fifo[MEMCG_NR_GENS][MEMCG_NR_BINS];
+ /* protects the above */
+ spinlock_t lock;
#ifdef CONFIG_LRU_GEN
/* kswap mm walk data */
struct lru_gen_mm_walk mm_walk;
-+ /* lru_gen_page list */
++ /* lru_gen_folio list */
+ struct lru_gen_memcg memcg_lru;
#endif
struct mem_cgroup_tree_per_node *mctz;
+ if (lru_gen_enabled()) {
-+ struct lruvec *lruvec = &mem_cgroup_page_nodeinfo(memcg, page)->lruvec;
++ struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
+
+ /* see the comment on MEMCG_NR_GENS */
+ if (soft_limit_excess(memcg) && lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
+ return;
+ }
+
- mctz = soft_limit_tree_from_page(page);
+ mctz = soft_limit_tree.rb_tree_per_node[nid];
if (!mctz)
return;
@@ -3433,6 +3443,9 @@ unsigned long mem_cgroup_soft_limit_recl
- try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
-
- /* skip this lruvec as it's low on cold pages */
+ /* skip this lruvec as it's low on cold folios */
- return 0;
+ return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
}
+ if (nr_to_scan <= 0)
break;
- delta = evict_pages(lruvec, sc, swappiness);
+ delta = evict_folios(lruvec, sc, swappiness);
@@ -4912,10 +4895,250 @@ static void lru_gen_shrink_lruvec(struct
cond_resched();
}
+ int bin;
+ int first_bin;
+ struct lruvec *lruvec;
-+ struct lru_gen_page *lrugen;
++ struct lru_gen_folio *lrugen;
+ const struct hlist_nulls_node *pos;
+ int op = 0;
+ struct mem_cgroup *memcg = NULL;
opportunistically skip the aging path, since it is considered less
latency sensitive.
3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
- swappiness to prioritize file LRU, since clean file pages are more
+ swappiness to prioritize file LRU, since clean file folios are more
likely to exist.
-4. sc->may_writepage and sc->may_unmap, which indicates opportunistic
- reclaim, are rejected, since unmapped clean pages are already
+4. sc->may_writefolio and sc->may_unmap, which indicates opportunistic
+ reclaim, are rejected, since unmapped clean folios are already
prioritized. Scanning for more of them is likely futile and can
cause high reclaim latency when there is a large number of memcgs.
+ return 0;
+
if (!can_demote(pgdat->node_id, sc) &&
- mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
+ mem_cgroup_get_nr_swap_folios(memcg) < MIN_LRU_BATCH)
return 0;
@@ -3952,7 +3955,7 @@ static void walk_mm(struct lruvec *lruve
} while (err == -EAGAIN);
- VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
-
- /* see the comment on lru_gen_page */
+ /* see the comment on lru_gen_folio */
gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
-@@ -4472,12 +4473,8 @@ static bool isolate_page(struct lruvec *
+@@ -4472,12 +4473,8 @@ static bool isolate_folio(struct lruvec *
{
bool success;
- /* unmapping inhibited */
-- if (!sc->may_unmap && page_mapped(page))
+- if (!sc->may_unmap && folio_mapped(folio))
- return false;
-
/* swapping inhibited */
- if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
+ if (!(sc->gfp_mask & __GFP_IO) &&
- (PageDirty(page) ||
- (PageAnon(page) && !PageSwapCache(page))))
+ (folio_test_dirty(folio) ||
+ (folio_test_anon(folio) && !folio_test_swapcache(folio))))
return false;
-@@ -4574,9 +4571,8 @@ static int scan_pages(struct lruvec *lru
+@@ -4574,9 +4571,8 @@ static int scan_folios(struct lruvec *lru
__count_vm_events(PGSCAN_ANON + type, isolated);
/*
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
+ int swappiness = get_swappiness(lruvec, sc);
+
-+ /* clean file pages are more likely to exist */
++ /* clean file folios are more likely to exist */
+ if (swappiness && !(sc->gfp_mask & __GFP_IO))
+ swappiness = 1;
blk_start_plug(&plug);
- set_mm_walk(lruvec_pgdat(lruvec));
-+ set_mm_walk(NULL, false);
++ set_mm_walk(NULL, sc->proactive);
if (try_to_shrink_lruvec(lruvec, sc))
lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
VM_WARN_ON_ONCE(!global_reclaim(sc));
+ /*
-+ * Unmapped clean pages are already prioritized. Scanning for more of
++ * Unmapped clean folios are already prioritized. Scanning for more of
+ * them is likely futile and can cause high reclaim latency when there
+ * is a large number of memcgs.
+ */
blk_start_plug(&plug);
- set_mm_walk(pgdat);
-+ set_mm_walk(pgdat, false);
++ set_mm_walk(NULL, sc->proactive);
set_initial_priority(pgdat, sc);
int bin;
int first_bin;
struct lruvec *lruvec;
- struct lru_gen_page *lrugen;
+ struct lru_gen_folio *lrugen;
+ struct mem_cgroup *memcg;
const struct hlist_nulls_node *pos;
- int op = 0;