a7dfb5ffe7960e4faf58cde0c636a4e55ba3cbc9
[openwrt/staging/svanheule.git] /
1 From 397624e12244ec038f51cb1f178ccb7a2ec562e5 Mon Sep 17 00:00:00 2001
2 From: "T.J. Alumbaugh" <talumbau@google.com>
3 Date: Wed, 18 Jan 2023 00:18:23 +0000
4 Subject: [PATCH 14/19] UPSTREAM: mm: multi-gen LRU: section for Bloom filters
5
6 Move Bloom filters code into a dedicated section. Improve the design doc
7 to explain Bloom filter usage and connection between aging and eviction in
8 their use.
9
10 Link: https://lkml.kernel.org/r/20230118001827.1040870-4-talumbau@google.com
11 Change-Id: I73e866f687c1ed9f5c8538086aa39408b79897db
12 Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
13 Cc: Yu Zhao <yuzhao@google.com>
14 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
15 (cherry picked from commit ccbbbb85945d8f0255aa9dbc1b617017e2294f2c)
16 Bug: 274865848
17 Signed-off-by: T.J. Mercier <tjmercier@google.com>
18 ---
19 Documentation/mm/multigen_lru.rst | 16 +++
20 mm/vmscan.c | 180 +++++++++++++++---------------
21 2 files changed, 108 insertions(+), 88 deletions(-)
22
23 --- a/Documentation/mm/multigen_lru.rst
24 +++ b/Documentation/mm/multigen_lru.rst
25 @@ -170,6 +170,22 @@ promotes hot pages. If the scan was done
26 adds the PMD entry pointing to the PTE table to the Bloom filter. This
27 forms a feedback loop between the eviction and the aging.
28
29 +Bloom Filters
30 +-------------
31 +Bloom filters are a space and memory efficient data structure for set
32 +membership test, i.e., test if an element is not in the set or may be
33 +in the set.
34 +
35 +In the eviction path, specifically, in ``lru_gen_look_around()``, if a
36 +PMD has a sufficient number of hot pages, its address is placed in the
37 +filter. In the aging path, set membership means that the PTE range
38 +will be scanned for young pages.
39 +
40 +Note that Bloom filters are probabilistic on set membership. If a test
41 +is false positive, the cost is an additional scan of a range of PTEs,
42 +which may yield hot pages anyway. Parameters of the filter itself can
43 +control the false positive rate in the limit.
44 +
45 Summary
46 -------
47 The multi-gen LRU can be disassembled into the following parts:
48 --- a/mm/vmscan.c
49 +++ b/mm/vmscan.c
50 @@ -3209,6 +3209,98 @@ static bool __maybe_unused seq_is_valid(
51 }
52
53 /******************************************************************************
54 + * Bloom filters
55 + ******************************************************************************/
56 +
57 +/*
58 + * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
59 + * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
60 + * bits in a bitmap, k is the number of hash functions and n is the number of
61 + * inserted items.
62 + *
63 + * Page table walkers use one of the two filters to reduce their search space.
64 + * To get rid of non-leaf entries that no longer have enough leaf entries, the
65 + * aging uses the double-buffering technique to flip to the other filter each
66 + * time it produces a new generation. For non-leaf entries that have enough
67 + * leaf entries, the aging carries them over to the next generation in
68 + * walk_pmd_range(); the eviction also report them when walking the rmap
69 + * in lru_gen_look_around().
70 + *
71 + * For future optimizations:
72 + * 1. It's not necessary to keep both filters all the time. The spare one can be
73 + * freed after the RCU grace period and reallocated if needed again.
74 + * 2. And when reallocating, it's worth scaling its size according to the number
75 + * of inserted entries in the other filter, to reduce the memory overhead on
76 + * small systems and false positives on large systems.
77 + * 3. Jenkins' hash function is an alternative to Knuth's.
78 + */
79 +#define BLOOM_FILTER_SHIFT 15
80 +
81 +static inline int filter_gen_from_seq(unsigned long seq)
82 +{
83 + return seq % NR_BLOOM_FILTERS;
84 +}
85 +
86 +static void get_item_key(void *item, int *key)
87 +{
88 + u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
89 +
90 + BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
91 +
92 + key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
93 + key[1] = hash >> BLOOM_FILTER_SHIFT;
94 +}
95 +
96 +static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
97 +{
98 + int key[2];
99 + unsigned long *filter;
100 + int gen = filter_gen_from_seq(seq);
101 +
102 + filter = READ_ONCE(lruvec->mm_state.filters[gen]);
103 + if (!filter)
104 + return true;
105 +
106 + get_item_key(item, key);
107 +
108 + return test_bit(key[0], filter) && test_bit(key[1], filter);
109 +}
110 +
111 +static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
112 +{
113 + int key[2];
114 + unsigned long *filter;
115 + int gen = filter_gen_from_seq(seq);
116 +
117 + filter = READ_ONCE(lruvec->mm_state.filters[gen]);
118 + if (!filter)
119 + return;
120 +
121 + get_item_key(item, key);
122 +
123 + if (!test_bit(key[0], filter))
124 + set_bit(key[0], filter);
125 + if (!test_bit(key[1], filter))
126 + set_bit(key[1], filter);
127 +}
128 +
129 +static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
130 +{
131 + unsigned long *filter;
132 + int gen = filter_gen_from_seq(seq);
133 +
134 + filter = lruvec->mm_state.filters[gen];
135 + if (filter) {
136 + bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
137 + return;
138 + }
139 +
140 + filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
141 + __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
142 + WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
143 +}
144 +
145 +/******************************************************************************
146 * mm_struct list
147 ******************************************************************************/
148
149 @@ -3333,94 +3425,6 @@ void lru_gen_migrate_mm(struct mm_struct
150 }
151 #endif
152
153 -/*
154 - * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
155 - * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
156 - * bits in a bitmap, k is the number of hash functions and n is the number of
157 - * inserted items.
158 - *
159 - * Page table walkers use one of the two filters to reduce their search space.
160 - * To get rid of non-leaf entries that no longer have enough leaf entries, the
161 - * aging uses the double-buffering technique to flip to the other filter each
162 - * time it produces a new generation. For non-leaf entries that have enough
163 - * leaf entries, the aging carries them over to the next generation in
164 - * walk_pmd_range(); the eviction also report them when walking the rmap
165 - * in lru_gen_look_around().
166 - *
167 - * For future optimizations:
168 - * 1. It's not necessary to keep both filters all the time. The spare one can be
169 - * freed after the RCU grace period and reallocated if needed again.
170 - * 2. And when reallocating, it's worth scaling its size according to the number
171 - * of inserted entries in the other filter, to reduce the memory overhead on
172 - * small systems and false positives on large systems.
173 - * 3. Jenkins' hash function is an alternative to Knuth's.
174 - */
175 -#define BLOOM_FILTER_SHIFT 15
176 -
177 -static inline int filter_gen_from_seq(unsigned long seq)
178 -{
179 - return seq % NR_BLOOM_FILTERS;
180 -}
181 -
182 -static void get_item_key(void *item, int *key)
183 -{
184 - u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
185 -
186 - BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
187 -
188 - key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
189 - key[1] = hash >> BLOOM_FILTER_SHIFT;
190 -}
191 -
192 -static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
193 -{
194 - unsigned long *filter;
195 - int gen = filter_gen_from_seq(seq);
196 -
197 - filter = lruvec->mm_state.filters[gen];
198 - if (filter) {
199 - bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
200 - return;
201 - }
202 -
203 - filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
204 - __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
205 - WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
206 -}
207 -
208 -static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
209 -{
210 - int key[2];
211 - unsigned long *filter;
212 - int gen = filter_gen_from_seq(seq);
213 -
214 - filter = READ_ONCE(lruvec->mm_state.filters[gen]);
215 - if (!filter)
216 - return;
217 -
218 - get_item_key(item, key);
219 -
220 - if (!test_bit(key[0], filter))
221 - set_bit(key[0], filter);
222 - if (!test_bit(key[1], filter))
223 - set_bit(key[1], filter);
224 -}
225 -
226 -static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
227 -{
228 - int key[2];
229 - unsigned long *filter;
230 - int gen = filter_gen_from_seq(seq);
231 -
232 - filter = READ_ONCE(lruvec->mm_state.filters[gen]);
233 - if (!filter)
234 - return true;
235 -
236 - get_item_key(item, key);
237 -
238 - return test_bit(key[0], filter) && test_bit(key[1], filter);
239 -}
240 -
241 static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
242 {
243 int i;