mm, numa: fix bad pmd by atomically check for pmd_trans_huge when marking page tables...

author Mel Gorman <mgorman@techsingularity.net>

Fri, 6 Mar 2020 06:28:26 +0000 (22:28 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 6 Mar 2020 13:06:09 +0000 (07:06 -0600)
author Mel Gorman <mgorman@techsingularity.net>
Fri, 6 Mar 2020 06:28:26 +0000 (22:28 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Mar 2020 13:06:09 +0000 (07:06 -0600)
diff --git a/mm/mprotect.c b/mm/mprotect.c

index 7a8e84f86831b4e4f2a8cdf535bf48fb9990979e..311c0dadf71c9f5eb51b4d2004028fe0fe9cc4d8 100644 (file)
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -161,6 +161,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
         return pages;
  }
  
+/*
+ * Used when setting automatic NUMA hinting protection where it is
+ * critical that a numa hinting PMD is not confused with a bad PMD.
+ */
+static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
+{
+       pmd_t pmdval = pmd_read_atomic(pmd);
+
+       /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       barrier();
+#endif
+
+       if (pmd_none(pmdval))
+               return 1;
+       if (pmd_trans_huge(pmdval))
+               return 0;
+       if (unlikely(pmd_bad(pmdval))) {
+               pmd_clear_bad(pmd);
+               return 1;
+       }
+
+       return 0;
+}
+
  static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                 pud_t *pud, unsigned long addr, unsigned long end,
                 pgprot_t newprot, int dirty_accountable, int prot_numa)
@@ -178,8 +203,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                 unsigned long this_pages;
  
                 next = pmd_addr_end(addr, end);
-               if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
-                               && pmd_none_or_clear_bad(pmd))
+
+               /*
+                * Automatic NUMA balancing walks the tables with mmap_sem
+                * held for read. It's possible a parallel update to occur
+                * between pmd_trans_huge() and a pmd_none_or_clear_bad()
+                * check leading to a false positive and clearing.
+                * Hence, it's necessary to atomically read the PMD value
+                * for all the checks.
+                */
+               if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) &&
+                    pmd_none_or_clear_bad_unless_trans_huge(pmd))
                         goto next;
  
                 /* invoke the mmu notifier if the pmd is populated */
author	Mel Gorman <mgorman@techsingularity.net>
	Fri, 6 Mar 2020 06:28:26 +0000 (22:28 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 6 Mar 2020 13:06:09 +0000 (07:06 -0600)