arm64: KVM: flush VM pages before letting the guest enable caches
authorMarc Zyngier <marc.zyngier@arm.com>
Wed, 15 Jan 2014 12:50:23 +0000 (12:50 +0000)
committerMarc Zyngier <marc.zyngier@arm.com>
Mon, 3 Mar 2014 01:15:22 +0000 (01:15 +0000)
When the guest runs with caches disabled (like in an early boot
sequence, for example), all the writes are diectly going to RAM,
bypassing the caches altogether.

Once the MMU and caches are enabled, whatever sits in the cache
becomes suddenly visible, which isn't what the guest expects.

A way to avoid this potential disaster is to invalidate the cache
when the MMU is being turned on. For this, we hook into the SCTLR_EL1
trapping code, and scan the stage-2 page tables, invalidating the
pages/sections that have already been mapped in.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
arch/arm/include/asm/kvm_mmu.h
arch/arm/kvm/mmu.c
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/kvm/sys_regs.c

index 891afe78311a281c758e60cde6857a8fc05b8ada..eb85b81eea6f10a8a66b93f04964557478e994a1 100644 (file)
@@ -155,6 +155,8 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 #define kvm_flush_dcache_to_poc(a,l)   __cpuc_flush_dcache_area((a), (l))
 #define kvm_virt_to_phys(x)            virt_to_idmap((unsigned long)(x))
 
+void stage2_flush_vm(struct kvm *kvm);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
index c1c08b240f35274068576a3c50e795569d8ed54a..d7e998c6a08fe9f4a2d060c39c6c2f34953489d7 100644 (file)
@@ -187,6 +187,99 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
        }
 }
 
+static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+                             phys_addr_t addr, phys_addr_t end)
+{
+       pte_t *pte;
+
+       pte = pte_offset_kernel(pmd, addr);
+       do {
+               if (!pte_none(*pte)) {
+                       hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+                       kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
+               }
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+                             phys_addr_t addr, phys_addr_t end)
+{
+       pmd_t *pmd;
+       phys_addr_t next;
+
+       pmd = pmd_offset(pud, addr);
+       do {
+               next = kvm_pmd_addr_end(addr, end);
+               if (!pmd_none(*pmd)) {
+                       if (kvm_pmd_huge(*pmd)) {
+                               hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+                               kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
+                       } else {
+                               stage2_flush_ptes(kvm, pmd, addr, next);
+                       }
+               }
+       } while (pmd++, addr = next, addr != end);
+}
+
+static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
+                             phys_addr_t addr, phys_addr_t end)
+{
+       pud_t *pud;
+       phys_addr_t next;
+
+       pud = pud_offset(pgd, addr);
+       do {
+               next = kvm_pud_addr_end(addr, end);
+               if (!pud_none(*pud)) {
+                       if (pud_huge(*pud)) {
+                               hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+                               kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
+                       } else {
+                               stage2_flush_pmds(kvm, pud, addr, next);
+                       }
+               }
+       } while (pud++, addr = next, addr != end);
+}
+
+static void stage2_flush_memslot(struct kvm *kvm,
+                                struct kvm_memory_slot *memslot)
+{
+       phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+       phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+       phys_addr_t next;
+       pgd_t *pgd;
+
+       pgd = kvm->arch.pgd + pgd_index(addr);
+       do {
+               next = kvm_pgd_addr_end(addr, end);
+               stage2_flush_puds(kvm, pgd, addr, next);
+       } while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the stage 2 page tables and invalidate any cache lines
+ * backing memory already mapped to the VM.
+ */
+void stage2_flush_vm(struct kvm *kvm)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+
+       slots = kvm_memslots(kvm);
+       kvm_for_each_memslot(memslot, slots)
+               stage2_flush_memslot(kvm, memslot);
+
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * free_boot_hyp_pgd - free HYP boot page tables
  *
index 00c0cc8b8045a17ef72991994f039914c16a3b1e..7d29847a893b89b4edf6460599d0d9e9ff720127 100644 (file)
@@ -150,5 +150,7 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 
 #define kvm_virt_to_phys(x)            __virt_to_phys((unsigned long)(x))
 
+void stage2_flush_vm(struct kvm *kvm);
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
index 2097e5ecba42a6342537f447f67599b1824a5be7..03244582bc555af4b303fca4370890e278114d16 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <trace/events/kvm.h>
@@ -154,8 +155,10 @@ static bool access_sctlr(struct kvm_vcpu *vcpu,
 {
        access_vm_reg(vcpu, p, r);
 
-       if (vcpu_has_cache_enabled(vcpu))       /* MMU+Caches enabled? */
+       if (vcpu_has_cache_enabled(vcpu)) {     /* MMU+Caches enabled? */
                vcpu->arch.hcr_el2 &= ~HCR_TVM;
+               stage2_flush_vm(vcpu->kvm);
+       }
 
        return true;
 }