KVM: arm/arm64: Fix handling of stage2 huge mappings

author Suzuki K Poulose <suzuki.poulose@arm.com>

Wed, 20 Mar 2019 14:57:19 +0000 (14:57 +0000)

committer Marc Zyngier <marc.zyngier@arm.com>

Wed, 20 Mar 2019 17:29:55 +0000 (17:29 +0000)
author Suzuki K Poulose <suzuki.poulose@arm.com>
Wed, 20 Mar 2019 14:57:19 +0000 (14:57 +0000)
committer Marc Zyngier <marc.zyngier@arm.com>
Wed, 20 Mar 2019 17:29:55 +0000 (17:29 +0000)
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h

index de2089501b8b5705a29bcb80b7007d630cfabc60..9e11dce55e06f4e7359b7b779cc7814ae752c813 100644 (file)
--- a/arch/arm/include/asm/stage2_pgtable.h
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -75,6 +75,8 @@ static inline bool kvm_stage2_has_pud(struct kvm *kvm)
  
  #define S2_PMD_MASK                            PMD_MASK
  #define S2_PMD_SIZE                            PMD_SIZE
+#define S2_PUD_MASK                            PUD_MASK
+#define S2_PUD_SIZE                            PUD_SIZE
  
  static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
  {
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c

index bcdf978c0d1d27adadfbf8e598a05e50a3530c8f..f9da2fad9bd6b403e32e2f95324fe21d9e7309b0 100644 (file)
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1067,25 +1067,43 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
  {
         pmd_t *pmd, old_pmd;
  
+retry:
         pmd = stage2_get_pmd(kvm, cache, addr);
         VM_BUG_ON(!pmd);
  
         old_pmd = *pmd;
+       /*
+        * Multiple vcpus faulting on the same PMD entry, can
+        * lead to them sequentially updating the PMD with the
+        * same value. Following the break-before-make
+        * (pmd_clear() followed by tlb_flush()) process can
+        * hinder forward progress due to refaults generated
+        * on missing translations.
+        *
+        * Skip updating the page table if the entry is
+        * unchanged.
+        */
+       if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+               return 0;
+
         if (pmd_present(old_pmd)) {
                 /*
-                * Multiple vcpus faulting on the same PMD entry, can
-                * lead to them sequentially updating the PMD with the
-                * same value. Following the break-before-make
-                * (pmd_clear() followed by tlb_flush()) process can
-                * hinder forward progress due to refaults generated
-                * on missing translations.
+                * If we already have PTE level mapping for this block,
+                * we must unmap it to avoid inconsistent TLB state and
+                * leaking the table page. We could end up in this situation
+                * if the memory slot was marked for dirty logging and was
+                * reverted, leaving PTE level mappings for the pages accessed
+                * during the period. So, unmap the PTE level mapping for this
+                * block and retry, as we could have released the upper level
+                * table in the process.
                  *
-                * Skip updating the page table if the entry is
-                * unchanged.
+                * Normal THP split/merge follows mmu_notifier callbacks and do
+                * get handled accordingly.
                  */
-               if (pmd_val(old_pmd) == pmd_val(*new_pmd))
-                       return 0;
-
+               if (!pmd_thp_or_huge(old_pmd)) {
+                       unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE);
+                       goto retry;
+               }
                 /*
                  * Mapping in huge pages should only happen through a
                  * fault.  If a page is merged into a transparent huge
@@ -1097,8 +1115,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
                  * should become splitting first, unmapped, merged,
                  * and mapped back in on-demand.
                  */
-               VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
-
+               WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
                 pmd_clear(pmd);
                 kvm_tlb_flush_vmid_ipa(kvm, addr);
         } else {
@@ -1114,6 +1131,7 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac
  {
         pud_t *pudp, old_pud;
  
+retry:
         pudp = stage2_get_pud(kvm, cache, addr);
         VM_BUG_ON(!pudp);
  
@@ -1121,14 +1139,23 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac
  
         /*
          * A large number of vcpus faulting on the same stage 2 entry,
-        * can lead to a refault due to the
-        * stage2_pud_clear()/tlb_flush(). Skip updating the page
-        * tables if there is no change.
+        * can lead to a refault due to the stage2_pud_clear()/tlb_flush().
+        * Skip updating the page tables if there is no change.
          */
         if (pud_val(old_pud) == pud_val(*new_pudp))
                 return 0;
  
         if (stage2_pud_present(kvm, old_pud)) {
+               /*
+                * If we already have table level mapping for this block, unmap
+                * the range for this block and retry.
+                */
+               if (!stage2_pud_huge(kvm, old_pud)) {
+                       unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE);
+                       goto retry;
+               }
+
+               WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp));
                 stage2_pud_clear(kvm, pudp);
                 kvm_tlb_flush_vmid_ipa(kvm, addr);
         } else {
author	Suzuki K Poulose <suzuki.poulose@arm.com>
	Wed, 20 Mar 2019 14:57:19 +0000 (14:57 +0000)
committer	Marc Zyngier <marc.zyngier@arm.com>
	Wed, 20 Mar 2019 17:29:55 +0000 (17:29 +0000)
arch/arm/include/asm/stage2_pgtable.h		patch \| blob \| history
virt/kvm/arm/mmu.c		patch \| blob \| history