return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg)
+static inline void mem_cgroup_protection(struct mem_cgroup *memcg,
+ unsigned long *min, unsigned long *low)
{
- if (mem_cgroup_disabled())
- return 0;
+ if (mem_cgroup_disabled()) {
+ *min = 0;
+ *low = 0;
+ return;
+ }
- return max(READ_ONCE(memcg->memory.emin), READ_ONCE(memcg->memory.elow));
+ *min = READ_ONCE(memcg->memory.emin);
+ *low = READ_ONCE(memcg->memory.elow);
}
enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
{
}
-static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg)
+static inline void mem_cgroup_protection(struct mem_cgroup *memcg,
+ unsigned long *min, unsigned long *low)
{
- return 0;
+ *min = 0;
+ *low = 0;
}
static inline enum mem_cgroup_protection mem_cgroup_protected(
int file = is_file_lru(lru);
unsigned long lruvec_size;
unsigned long scan;
- unsigned long protection;
+ unsigned long min, low;
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
- protection = mem_cgroup_protection(memcg);
+ mem_cgroup_protection(memcg, &min, &low);
- if (protection > 0) {
+ if (min || low) {
/*
* Scale a cgroup's reclaim pressure by proportioning
* its current usage to its memory.low or memory.min
* set it too low, which is not ideal.
*/
unsigned long cgroup_size = mem_cgroup_size(memcg);
- unsigned long baseline = 0;
/*
- * During the reclaim first pass, we only consider
- * cgroups in excess of their protection setting, but if
- * that doesn't produce free pages, we come back for a
- * second pass where we reclaim from all groups.
+ * If there is any protection in place, we adjust scan
+ * pressure in proportion to how much a group's current
+ * usage exceeds that, in percent.
*
- * To maintain fairness in both cases, the first pass
- * targets groups in proportion to their overage, and
- * the second pass targets groups in proportion to their
- * protection utilization.
- *
- * So on the first pass, a group whose size is 130% of
- * its protection will be targeted at 30% of its size.
- * On the second pass, a group whose size is at 40% of
- * its protection will be
- * targeted at 40% of its size.
+ * There is one special case: in the first reclaim pass,
+ * we skip over all groups that are within their low
+ * protection. If that fails to reclaim enough pages to
+ * satisfy the reclaim goal, we come back and override
+ * the best-effort low protection. However, we still
+ * ideally want to honor how well-behaved groups are in
+ * that case instead of simply punishing them all
+ * equally. As such, we reclaim them based on how much
+ * of their best-effort protection they are using. Usage
+ * below memory.min is excluded from consideration when
+ * calculating utilisation, as it isn't ever
+ * reclaimable, so it might as well not exist for our
+ * purposes.
*/
- if (!sc->memcg_low_reclaim)
- baseline = lruvec_size;
- scan = lruvec_size * cgroup_size / protection - baseline;
+ if (sc->memcg_low_reclaim && low > min) {
+ /*
+ * Reclaim according to utilisation between min
+ * and low
+ */
+ scan = lruvec_size * (cgroup_size - min) /
+ (low - min);
+ } else {
+ /* Reclaim according to protection overage */
+ scan = lruvec_size * cgroup_size /
+ max(min, low) - lruvec_size;
+ }
/*
* Don't allow the scan target to exceed the lruvec
* some cases in the case of large overages.
*
* Also, minimally target SWAP_CLUSTER_MAX pages to keep
- * reclaim moving forwards.
+ * reclaim moving forwards, avoiding decremeting
+ * sc->priority further than desirable.
*/
scan = clamp(scan, SWAP_CLUSTER_MAX, lruvec_size);
} else {