#define THRESHOLDS_EVENTS_TARGET 128
#define SOFTLIMIT_EVENTS_TARGET 1024
-#define NUMAINFO_EVENTS_TARGET 1024
/*
* Cgroups above their limits are maintained in a RB-Tree, independent of
case MEM_CGROUP_TARGET_SOFTLIMIT:
next = val + SOFTLIMIT_EVENTS_TARGET;
break;
- case MEM_CGROUP_TARGET_NUMAINFO:
- next = val + NUMAINFO_EVENTS_TARGET;
- break;
default:
break;
}
if (unlikely(mem_cgroup_event_ratelimit(memcg,
MEM_CGROUP_TARGET_THRESH))) {
bool do_softlimit;
- bool do_numainfo __maybe_unused;
do_softlimit = mem_cgroup_event_ratelimit(memcg,
MEM_CGROUP_TARGET_SOFTLIMIT);
-#if MAX_NUMNODES > 1
- do_numainfo = mem_cgroup_event_ratelimit(memcg,
- MEM_CGROUP_TARGET_NUMAINFO);
-#endif
mem_cgroup_threshold(memcg);
if (unlikely(do_softlimit))
mem_cgroup_update_tree(memcg, page);
-#if MAX_NUMNODES > 1
- if (unlikely(do_numainfo))
- atomic_inc(&memcg->numainfo_events);
-#endif
}
}
return ret;
}
-#if MAX_NUMNODES > 1
-
-/**
- * test_mem_cgroup_node_reclaimable
- * @memcg: the target memcg
- * @nid: the node ID to be checked.
- * @noswap : specify true here if the user wants flle only information.
- *
- * This function returns whether the specified memcg contains any
- * reclaimable pages on a node. Returns true if there are any reclaimable
- * pages in the node.
- */
-static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
- int nid, bool noswap)
-{
- struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
-
- if (lruvec_page_state(lruvec, NR_INACTIVE_FILE) ||
- lruvec_page_state(lruvec, NR_ACTIVE_FILE))
- return true;
- if (noswap || !total_swap_pages)
- return false;
- if (lruvec_page_state(lruvec, NR_INACTIVE_ANON) ||
- lruvec_page_state(lruvec, NR_ACTIVE_ANON))
- return true;
- return false;
-
-}
-
-/*
- * Always updating the nodemask is not very good - even if we have an empty
- * list or the wrong list here, we can start from some node and traverse all
- * nodes based on the zonelist. So update the list loosely once per 10 secs.
- *
- */
-static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg)
-{
- int nid;
- /*
- * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
- * pagein/pageout changes since the last update.
- */
- if (!atomic_read(&memcg->numainfo_events))
- return;
- if (atomic_inc_return(&memcg->numainfo_updating) > 1)
- return;
-
- /* make a nodemask where this memcg uses memory from */
- memcg->scan_nodes = node_states[N_MEMORY];
-
- for_each_node_mask(nid, node_states[N_MEMORY]) {
-
- if (!test_mem_cgroup_node_reclaimable(memcg, nid, false))
- node_clear(nid, memcg->scan_nodes);
- }
-
- atomic_set(&memcg->numainfo_events, 0);
- atomic_set(&memcg->numainfo_updating, 0);
-}
-
-/*
- * Selecting a node where we start reclaim from. Because what we need is just
- * reducing usage counter, start from anywhere is O,K. Considering
- * memory reclaim from current node, there are pros. and cons.
- *
- * Freeing memory from current node means freeing memory from a node which
- * we'll use or we've used. So, it may make LRU bad. And if several threads
- * hit limits, it will see a contention on a node. But freeing from remote
- * node means more costs for memory reclaim because of memory latency.
- *
- * Now, we use round-robin. Better algorithm is welcomed.
- */
-int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
-{
- int node;
-
- mem_cgroup_may_update_nodemask(memcg);
- node = memcg->last_scanned_node;
-
- node = next_node_in(node, memcg->scan_nodes);
- /*
- * mem_cgroup_may_update_nodemask might have seen no reclaimmable pages
- * last time it really checked all the LRUs due to rate limiting.
- * Fallback to the current node in that case for simplicity.
- */
- if (unlikely(node == MAX_NUMNODES))
- node = numa_node_id();
-
- memcg->last_scanned_node = node;
- return node;
-}
-#else
-int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
-{
- return 0;
-}
-#endif
-
static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
pg_data_t *pgdat,
gfp_t gfp_mask,
goto fail;
INIT_WORK(&memcg->high_work, high_work_func);
- memcg->last_scanned_node = MAX_NUMNODES;
INIT_LIST_HEAD(&memcg->oom_notify);
mutex_init(&memcg->thresholds_lock);
spin_lock_init(&memcg->move_lock);
gfp_t gfp_mask,
bool may_swap)
{
- struct zonelist *zonelist;
unsigned long nr_reclaimed;
unsigned long pflags;
- int nid;
unsigned int noreclaim_flag;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
.may_unmap = 1,
.may_swap = may_swap,
};
-
- set_task_reclaim_state(current, &sc.reclaim_state);
/*
- * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
- * take care of from where we get pages. So the node where we start the
- * scan does not need to be the current node.
+ * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
+ * equal pressure on all the nodes. This is based on the assumption that
+ * the reclaim does not bail out early.
*/
- nid = mem_cgroup_select_victim_node(memcg);
+ struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
- zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
+ set_task_reclaim_state(current, &sc.reclaim_state);
trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask);