KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block
authorMarc Zyngier <maz@kernel.org>
Fri, 2 Aug 2019 09:28:32 +0000 (10:28 +0100)
committerMarc Zyngier <maz@kernel.org>
Mon, 5 Aug 2019 14:36:46 +0000 (15:36 +0100)
Since commit commit 328e56647944 ("KVM: arm/arm64: vgic: Defer
touching GICH_VMCR to vcpu_load/put"), we leave ICH_VMCR_EL2 (or
its GICv2 equivalent) loaded as long as we can, only syncing it
back when we're scheduled out.

There is a small snag with that though: kvm_vgic_vcpu_pending_irq(),
which is indirectly called from kvm_vcpu_check_block(), needs to
evaluate the guest's view of ICC_PMR_EL1. At the point were we
call kvm_vcpu_check_block(), the vcpu is still loaded, and whatever
changes to PMR is not visible in memory until we do a vcpu_put().

Things go really south if the guest does the following:

mov x0, #0 // or any small value masking interrupts
msr ICC_PMR_EL1, x0

[vcpu preempted, then rescheduled, VMCR sampled]

mov x0, #ff // allow all interrupts
msr ICC_PMR_EL1, x0
wfi // traps to EL2, so samping of VMCR

[interrupt arrives just after WFI]

Here, the hypervisor's view of PMR is zero, while the guest has enabled
its interrupts. kvm_vgic_vcpu_pending_irq() will then say that no
interrupts are pending (despite an interrupt being received) and we'll
block for no reason. If the guest doesn't have a periodic interrupt
firing once it has blocked, it will stay there forever.

To avoid this unfortuante situation, let's resync VMCR from
kvm_arch_vcpu_blocking(), ensuring that a following kvm_vcpu_check_block()
will observe the latest value of PMR.

This has been found by booting an arm64 Linux guest with the pseudo NMI
feature, and thus using interrupt priorities to mask interrupts instead
of the usual PSTATE masking.

Cc: stable@vger.kernel.org # 4.12
Fixes: 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put")
Signed-off-by: Marc Zyngier <maz@kernel.org>
include/kvm/arm_vgic.h
virt/kvm/arm/arm.c
virt/kvm/arm/vgic/vgic-v2.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c
virt/kvm/arm/vgic/vgic.h

index 46bbc949c20a0c8bfd89f24665b7b5c169e4d4ea..7a30524a80ee8793b446d745c225c5864f50cdc7 100644 (file)
@@ -350,6 +350,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 
 void kvm_vgic_load(struct kvm_vcpu *vcpu);
 void kvm_vgic_put(struct kvm_vcpu *vcpu);
+void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu);
 
 #define irqchip_in_kernel(k)   (!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)    ((k)->arch.vgic.initialized)
index c704fa6961844b924ea1a2e2a43382ad16773ed8..482b20256fa831faa1d9b5402dd533e473b4b627 100644 (file)
@@ -323,6 +323,17 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
+       /*
+        * If we're about to block (most likely because we've just hit a
+        * WFI), we need to sync back the state of the GIC CPU interface
+        * so that we have the lastest PMR and group enables. This ensures
+        * that kvm_arch_vcpu_runnable has up-to-date data to decide
+        * whether we have pending interrupts.
+        */
+       preempt_disable();
+       kvm_vgic_vmcr_sync(vcpu);
+       preempt_enable();
+
        kvm_vgic_v4_enable_doorbell(vcpu);
 }
 
index 6dd5ad706c927190152eb979dc2498b75d03a809..96aab77d04718a00164abd251566aaa8e2167b48 100644 (file)
@@ -484,10 +484,17 @@ void vgic_v2_load(struct kvm_vcpu *vcpu)
                       kvm_vgic_global_state.vctrl_base + GICH_APR);
 }
 
-void vgic_v2_put(struct kvm_vcpu *vcpu)
+void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu)
 {
        struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 
        cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
+}
+
+void vgic_v2_put(struct kvm_vcpu *vcpu)
+{
+       struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+
+       vgic_v2_vmcr_sync(vcpu);
        cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
 }
index c2c9ce009f6381556cf71f520c680af78fae4525..0c653a1e5215280f4076db92fdb1db61b7b01386 100644 (file)
@@ -662,12 +662,17 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
                __vgic_v3_activate_traps(vcpu);
 }
 
-void vgic_v3_put(struct kvm_vcpu *vcpu)
+void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
 {
        struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 
        if (likely(cpu_if->vgic_sre))
                cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
+}
+
+void vgic_v3_put(struct kvm_vcpu *vcpu)
+{
+       vgic_v3_vmcr_sync(vcpu);
 
        kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
 
index 04786c8ec77ed8e4a48b085c029ba2d03a008d4a..13d4b38a94ec4b81af13cd739497ae6db9640a55 100644 (file)
@@ -919,6 +919,17 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu)
                vgic_v3_put(vcpu);
 }
 
+void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
+{
+       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+               return;
+
+       if (kvm_vgic_global_state.type == VGIC_V2)
+               vgic_v2_vmcr_sync(vcpu);
+       else
+               vgic_v3_vmcr_sync(vcpu);
+}
+
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
index 57205beaa981a11366dec8cf2ac1553e7056c271..11adbdac1d56177bcab9aa471e1f5bb7307906d7 100644 (file)
@@ -193,6 +193,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
 void vgic_v2_init_lrs(void);
 void vgic_v2_load(struct kvm_vcpu *vcpu);
 void vgic_v2_put(struct kvm_vcpu *vcpu);
+void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
 
 void vgic_v2_save_state(struct kvm_vcpu *vcpu);
 void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
@@ -223,6 +224,7 @@ bool vgic_v3_check_base(struct kvm *kvm);
 
 void vgic_v3_load(struct kvm_vcpu *vcpu);
 void vgic_v3_put(struct kvm_vcpu *vcpu);
+void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
 
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);