return pgtable;
}
-void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
- VM_BUG_ON(pmd_devmap(*pmdp));
-
- /*
- * We can't mark the pmd none here, because that will cause a race
- * against exit_mmap. We need to continue mark pmd TRANS HUGE, while
- * we spilt, but at the same time we wan't rest of the ppc64 code
- * not to insert hash pte on this, because we will be modifying
- * the deposited pgtable in the caller of this function. Hence
- * clear the _PAGE_USER so that we move the fault handling to
- * higher level function and that will serialize against ptl.
- * We need to flush existing hash pte entries here even though,
- * the translation is still valid, because we will withdraw
- * pgtable_t after this.
- */
- pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
-}
-
/*
* A linux hugepage PMD was changed and the corresponding hash table entries
* neesd to be flushed.
struct mm_struct *mm = vma->vm_mm;
struct page *page;
pgtable_t pgtable;
- pmd_t old, _pmd;
+ pmd_t old_pmd, _pmd;
bool young, write, soft_dirty, pmd_migration = false;
unsigned long addr;
int i;
return __split_huge_zero_page_pmd(vma, haddr, pmd);
}
+ /*
+ * Up to this point the pmd is present and huge and userland has the
+ * whole access to the hugepage during the split (which happens in
+ * place). If we overwrite the pmd with the not-huge version pointing
+ * to the pte here (which of course we could if all CPUs were bug
+ * free), userland could trigger a small page size TLB miss on the
+ * small sized TLB while the hugepage TLB entry is still established in
+ * the huge TLB. Some CPU doesn't like that.
+ * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum
+ * 383 on page 93. Intel should be safe but is also warns that it's
+ * only safe if the permission and cache attributes of the two entries
+ * loaded in the two TLB is identical (which should be the case here).
+ * But it is generally safer to never allow small and huge TLB entries
+ * for the same virtual address to be loaded simultaneously. So instead
+ * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the
+ * current pmd notpresent (atomically because here the pmd_trans_huge
+ * must remain set at all times on the pmd until the split is complete
+ * for this pmd), then we flush the SMP TLB and finally we write the
+ * non-huge version of the pmd entry with pmd_populate.
+ */
+ old_pmd = pmdp_invalidate(vma, haddr, pmd);
+
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
- pmd_migration = is_pmd_migration_entry(*pmd);
+ pmd_migration = is_pmd_migration_entry(old_pmd);
if (pmd_migration) {
swp_entry_t entry;
- entry = pmd_to_swp_entry(*pmd);
+ entry = pmd_to_swp_entry(old_pmd);
page = pfn_to_page(swp_offset(entry));
} else
#endif
- page = pmd_page(*pmd);
+ page = pmd_page(old_pmd);
VM_BUG_ON_PAGE(!page_count(page), page);
page_ref_add(page, HPAGE_PMD_NR - 1);
- write = pmd_write(*pmd);
- young = pmd_young(*pmd);
- soft_dirty = pmd_soft_dirty(*pmd);
+ if (pmd_dirty(old_pmd))
+ SetPageDirty(page);
+ write = pmd_write(old_pmd);
+ young = pmd_young(old_pmd);
+ soft_dirty = pmd_soft_dirty(old_pmd);
- pmdp_huge_split_prepare(vma, haddr, pmd);
+ /*
+ * Withdraw the table only after we mark the pmd entry invalid.
+ * This's critical for some architectures (Power).
+ */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable);
}
smp_wmb(); /* make pte visible before pmd */
- /*
- * Up to this point the pmd is present and huge and userland has the
- * whole access to the hugepage during the split (which happens in
- * place). If we overwrite the pmd with the not-huge version pointing
- * to the pte here (which of course we could if all CPUs were bug
- * free), userland could trigger a small page size TLB miss on the
- * small sized TLB while the hugepage TLB entry is still established in
- * the huge TLB. Some CPU doesn't like that.
- * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum
- * 383 on page 93. Intel should be safe but is also warns that it's
- * only safe if the permission and cache attributes of the two entries
- * loaded in the two TLB is identical (which should be the case here).
- * But it is generally safer to never allow small and huge TLB entries
- * for the same virtual address to be loaded simultaneously. So instead
- * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the
- * current pmd notpresent (atomically because here the pmd_trans_huge
- * must remain set at all times on the pmd until the split is complete
- * for this pmd), then we flush the SMP TLB and finally we write the
- * non-huge version of the pmd entry with pmd_populate.
- */
- old = pmdp_invalidate(vma, haddr, pmd);
-
- /*
- * Transfer dirty bit using value returned by pmd_invalidate() to be
- * sure we don't race with CPU that can set the bit under us.
- */
- if (pmd_dirty(old))
- SetPageDirty(page);
-
pmd_populate(mm, pmd, pgtable);
if (freeze) {