typedef struct page *new_page_t(struct page *, unsigned long private, int **);
+/*
+ * Return values from addresss_space_operations.migratepage():
+ * - negative errno on page migration failure;
+ * - zero on page migration success;
+ *
+ * The balloon page migration introduces this special case where a 'distinct'
+ * return code is used to flag a successful page migration to unmap_and_move().
+ * This approach is necessary because page migration can race against balloon
+ * deflation procedure, and for such case we could introduce a nasty page leak
+ * if a successfully migrated balloon page gets released concurrently with
+ * migration's unmap_and_move() wrap-up steps.
+ */
+#define MIGRATEPAGE_SUCCESS 0
+#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page
+ * sucessful migration case.
+ */
+ enum migrate_reason {
+ MR_COMPACTION,
+ MR_MEMORY_FAILURE,
+ MR_MEMORY_HOTPLUG,
+ MR_SYSCALL, /* also applies to cpusets */
+ MR_MEMPOLICY_MBIND,
+ MR_NUMA_MISPLACED,
+ MR_CMA
+ };
#ifdef CONFIG_MIGRATION
#else
static inline void putback_lru_pages(struct list_head *l) {}
+static inline void putback_movable_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
- enum migrate_mode mode) { return -ENOSYS; }
+ enum migrate_mode mode, int reason) { return -ENOSYS; }
static inline int migrate_huge_page(struct page *page, new_page_t x,
unsigned long private, bool offlining,
enum migrate_mode mode) { return -ENOSYS; }
config HAVE_UNSTABLE_SCHED_CLOCK
bool
- default y
+ #
+ # For architectures that want to enable the support for NUMA-affine scheduler
+ # balancing logic:
+ #
+ config ARCH_SUPPORTS_NUMA_BALANCING
+ bool
+
+ # For architectures that (ab)use NUMA to represent different memory regions
+ # all cpu-local but of different latencies, such as SuperH.
+ #
+ config ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ bool
+
+ #
+ # For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE
+ config ARCH_WANTS_PROT_NUMA_PROT_NONE
+ bool
+
+ config ARCH_USES_NUMA_PROT_NONE
+ bool
+ default y
+ depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
+ depends on NUMA_BALANCING
+
+ config NUMA_BALANCING_DEFAULT_ENABLED
+ bool "Automatically enable NUMA aware memory/task placement"
+ default y
+ depends on NUMA_BALANCING
+ help
+ If set, autonumic NUMA balancing will be enabled if running on a NUMA
+ machine.
+
+ config NUMA_BALANCING
+ bool "Memory placement aware NUMA scheduler"
+ depends on ARCH_SUPPORTS_NUMA_BALANCING
+ depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ depends on SMP && NUMA && MIGRATION
+ help
+ This option adds support for automatic NUMA aware memory/task placement.
+ The mechanism is quite primitive and is based on migrating memory when
+ it is references to the node the task is running on.
+
+ This system will be inactive on UMA systems.
+
menuconfig CGROUPS
boolean "Control Group support"
depends on EVENTFD
entity_tick(cfs_rq, se, queued);
}
+ if (sched_feat_numa(NUMA))
+ task_tick_numa(rq, curr);
++
+ update_rq_runnable_avg(rq, 1);
}
/*
#include <linux/freezer.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
+ #include <linux/migrate.h>
+
#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
if (__pmd_trans_huge_lock(pmd, vma) == 1) {
pmd_t entry;
entry = pmdp_get_and_clear(mm, addr, pmd);
- entry = pmd_modify(entry, newprot);
+ if (!prot_numa)
+ entry = pmd_modify(entry, newprot);
+ else {
+ struct page *page = pmd_page(*pmd);
+
+ /* only check non-shared pages */
+ if (page_mapcount(page) == 1 &&
+ !pmd_numa(*pmd)) {
+ entry = pmd_mknuma(entry);
+ }
+ }
+ BUG_ON(pmd_write(entry));
set_pmd_at(mm, addr, pmd, entry);
spin_unlock(&vma->vm_mm->page_table_lock);
ret = 1;
struct anon_vma *anon_vma;
int ret = 1;
+ BUG_ON(is_huge_zero_pfn(page_to_pfn(page)));
BUG_ON(!PageAnon(page));
- anon_vma = page_lock_anon_vma(page);
+ anon_vma = page_lock_anon_vma_read(page);
if (!anon_vma)
goto out;
ret = 0;
hend = vma->vm_end & HPAGE_PMD_MASK;
if (address < hstart || address + HPAGE_PMD_SIZE > hend)
goto out;
-
- if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
- (vma->vm_flags & VM_NOHUGEPAGE))
- goto out;
-
- if (!vma->anon_vma || vma->vm_ops)
- goto out;
- if (is_vma_temporary_stack(vma))
+ if (!hugepage_vma_check(vma))
goto out;
- VM_BUG_ON(vma->vm_flags & VM_NO_THP);
-
- pgd = pgd_offset(mm, address);
- if (!pgd_present(*pgd))
- goto out;
-
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
+ pmd = mm_find_pmd(mm, address);
+ if (!pmd)
goto out;
-
- pmd = pmd_offset(pud, address);
- /* pmd can't go away or become huge under us */
- if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
+ if (pmd_trans_huge(*pmd))
goto out;
- anon_vma_lock(vma->anon_vma);
+ anon_vma_lock_write(vma->anon_vma);
pte = pte_offset_map(pmd, address);
ptl = pte_lockptr(mm, pmd);
page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
goto out;
}
+ if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+ goto no_page_table;
if (pmd_trans_huge(*pmd)) {
if (flags & FOLL_SPLIT) {
- split_huge_page_pmd(mm, pmd);
+ split_huge_page_pmd(vma, address, pmd);
goto split_fallthrough;
}
spin_lock(&mm->page_table_lock);
barrier();
if (pmd_trans_huge(orig_pmd)) {
- if (pmd_numa(*pmd))
+ unsigned int dirty = flags & FAULT_FLAG_WRITE;
+
- if (dirty && !pmd_write(orig_pmd) &&
- !pmd_trans_splitting(orig_pmd)) {
++ if (pmd_numa(orig_pmd))
+ return do_huge_pmd_numa_page(mm, vma, address,
+ orig_pmd, pmd);
+
- if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) {
++ if (dirty && !pmd_write(orig_pmd)) {
ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
orig_pmd);
/*
if (unlikely(ret & VM_FAULT_OOM))
goto retry;
return ret;
+ } else {
+ huge_pmd_set_accessed(mm, vma, address, pmd,
+ orig_pmd, dirty);
}
+
return 0;
}
}
case -EAGAIN:
retry++;
break;
- case 0:
+ case MIGRATEPAGE_SUCCESS:
+ nr_succeeded++;
break;
default:
/* Permanent failure */
}
}
}
- rc = 0;
+ rc = nr_failed + retry;
out:
+ if (nr_succeeded)
+ count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
+ if (nr_failed)
+ count_vm_events(PGMIGRATE_FAIL, nr_failed);
+ trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
+
if (!swapwrite)
current->flags &= ~PF_SWAPWRITE;
next = pmd_addr_end(addr, end);
if (pmd_trans_huge(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE)
- split_huge_page_pmd(vma->vm_mm, pmd);
+ split_huge_page_pmd(vma, addr, pmd);
- else if (change_huge_pmd(vma, pmd, addr, newprot))
+ else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) {
+ pages += HPAGE_PMD_NR;
continue;
+ }
/* fall through */
}
if (pmd_none_or_clear_bad(pmd))
ret = migrate_pages(&cc->migratepages,
alloc_migrate_target,
- 0, false, MIGRATE_SYNC);
+ 0, false, MIGRATE_SYNC,
+ MR_CMA);
}
- putback_lru_pages(&cc->migratepages);
+ putback_movable_pages(&cc->migratepages);
return ret > 0 ? 0 : ret;
}