/* kswapd must be awake if processes are being throttled */
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
- pgdat->classzone_idx = min(pgdat->classzone_idx,
+ pgdat->kswapd_classzone_idx = min(pgdat->kswapd_classzone_idx,
(enum zone_type)ZONE_NORMAL);
wake_up_interruptible(&pgdat->kswapd_wait);
}
if (!populated_zone(zone))
continue;
- if (zone_balanced(zone, order, classzone_idx))
- return true;
+ if (!zone_balanced(zone, order, classzone_idx))
+ return false;
}
- return false;
+ return true;
}
/*
return sc.order;
}
-static void kswapd_try_to_sleep(pg_data_t *pgdat, int order,
- int classzone_idx, int balanced_classzone_idx)
+static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
+ unsigned int classzone_idx)
{
long remaining = 0;
DEFINE_WAIT(wait);
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
/* Try to sleep for a short interval */
- if (prepare_kswapd_sleep(pgdat, order, remaining,
- balanced_classzone_idx)) {
+ if (prepare_kswapd_sleep(pgdat, reclaim_order, remaining, classzone_idx)) {
/*
* Compaction records what page blocks it recently failed to
* isolate pages from and skips them in the future scanning.
* We have freed the memory, now we should compact it to make
* allocation of the requested order possible.
*/
- wakeup_kcompactd(pgdat, order, classzone_idx);
+ wakeup_kcompactd(pgdat, alloc_order, classzone_idx);
remaining = schedule_timeout(HZ/10);
+
+ /*
+ * If woken prematurely then reset kswapd_classzone_idx and
+ * order. The values will either be from a wakeup request or
+ * the previous request that slept prematurely.
+ */
+ if (remaining) {
+ pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx);
+ pgdat->kswapd_order = max(pgdat->kswapd_order, reclaim_order);
+ }
+
finish_wait(&pgdat->kswapd_wait, &wait);
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
}
* After a short sleep, check if it was a premature sleep. If not, then
* go fully to sleep until explicitly woken up.
*/
- if (prepare_kswapd_sleep(pgdat, order, remaining,
- balanced_classzone_idx)) {
+ if (prepare_kswapd_sleep(pgdat, reclaim_order, remaining, classzone_idx)) {
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
/*
*/
static int kswapd(void *p)
{
- unsigned long order, new_order;
- int classzone_idx, new_classzone_idx;
- int balanced_classzone_idx;
+ unsigned int alloc_order, reclaim_order, classzone_idx;
pg_data_t *pgdat = (pg_data_t*)p;
struct task_struct *tsk = current;
tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
set_freezable();
- order = new_order = 0;
- classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
- balanced_classzone_idx = classzone_idx;
+ pgdat->kswapd_order = alloc_order = reclaim_order = 0;
+ pgdat->kswapd_classzone_idx = classzone_idx = 0;
for ( ; ; ) {
bool ret;
- /*
- * While we were reclaiming, there might have been another
- * wakeup, so check the values.
- */
- new_order = pgdat->kswapd_max_order;
- new_classzone_idx = pgdat->classzone_idx;
- pgdat->kswapd_max_order = 0;
- pgdat->classzone_idx = pgdat->nr_zones - 1;
+kswapd_try_sleep:
+ kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order,
+ classzone_idx);
- if (order < new_order || classzone_idx > new_classzone_idx) {
- /*
- * Don't sleep if someone wants a larger 'order'
- * allocation or has tigher zone constraints
- */
- order = new_order;
- classzone_idx = new_classzone_idx;
- } else {
- kswapd_try_to_sleep(pgdat, order, classzone_idx,
- balanced_classzone_idx);
- order = pgdat->kswapd_max_order;
- classzone_idx = pgdat->classzone_idx;
- new_order = order;
- new_classzone_idx = classzone_idx;
- pgdat->kswapd_max_order = 0;
- pgdat->classzone_idx = pgdat->nr_zones - 1;
- }
+ /* Read the new order and classzone_idx */
+ alloc_order = reclaim_order = pgdat->kswapd_order;
+ classzone_idx = pgdat->kswapd_classzone_idx;
+ pgdat->kswapd_order = 0;
+ pgdat->kswapd_classzone_idx = 0;
ret = try_to_freeze();
if (kthread_should_stop())
* We can speed up thawing tasks if we don't call balance_pgdat
* after returning from the refrigerator
*/
- if (!ret) {
- trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
+ if (ret)
+ continue;
+
+ /*
+ * Reclaim begins at the requested order but if a high-order
+ * reclaim fails then kswapd falls back to reclaiming for
+ * order-0. If that happens, kswapd will consider sleeping
+ * for the order it finished reclaiming at (reclaim_order)
+ * but kcompactd is woken to compact for the original
+ * request (alloc_order).
+ */
+ trace_mm_vmscan_kswapd_wake(pgdat->node_id, alloc_order);
+ reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
+ if (reclaim_order < alloc_order)
+ goto kswapd_try_sleep;
- /* return value ignored until next patch */
- balance_pgdat(pgdat, order, classzone_idx);
- }
+ alloc_order = reclaim_order = pgdat->kswapd_order;
+ classzone_idx = pgdat->kswapd_classzone_idx;
}
tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
return;
pgdat = zone->zone_pgdat;
- if (pgdat->kswapd_max_order < order) {
- pgdat->kswapd_max_order = order;
- pgdat->classzone_idx = min(pgdat->classzone_idx, classzone_idx);
- }
+ pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx, classzone_idx);
+ pgdat->kswapd_order = max(pgdat->kswapd_order, order);
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
if (zone_balanced(zone, order, 0))