kvm/x86: per-vcpu apicv deactivation support
authorAndrey Smetanin <asmetanin@virtuozzo.com>
Tue, 10 Nov 2015 12:36:33 +0000 (15:36 +0300)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 25 Nov 2015 16:24:21 +0000 (17:24 +0100)
The decision on whether to use hardware APIC virtualization used to be
taken globally, based on the availability of the feature in the CPU
and the value of a module parameter.

However, under certain circumstances we want to control it on per-vcpu
basis.  In particular, when the userspace activates HyperV synthetic
interrupt controller (SynIC), APICv has to be disabled as it's
incompatible with SynIC auto-EOI behavior.

To achieve that, introduce 'apicv_active' flag on struct
kvm_vcpu_arch, and kvm_vcpu_deactivate_apicv() function to turn APICv
off.  The flag is initialized based on the module parameter and CPU
capability, and consulted whenever an APICv-specific action is
performed.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/irq.c
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c

index f6d8894f25b4c0c2959a9b469d1c42bd0b985001..bac0d540f49c6cd59b70a01c2b2668a11ebf66ce 100644 (file)
@@ -400,6 +400,7 @@ struct kvm_vcpu_arch {
        u64 efer;
        u64 apic_base;
        struct kvm_lapic *apic;    /* kernel irqchip context */
+       bool apicv_active;
        DECLARE_BITMAP(ioapic_handled_vectors, 256);
        unsigned long apic_attention;
        int32_t apic_arb_prio;
@@ -831,7 +832,8 @@ struct kvm_x86_ops {
        void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
        void (*enable_irq_window)(struct kvm_vcpu *vcpu);
        void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-       int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
+       bool (*get_enable_apicv)(void);
+       void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
        void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
        void (*hwapic_isr_update)(struct kvm *kvm, int isr);
        void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
@@ -1086,6 +1088,8 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
                                struct x86_exception *exception);
 
+void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
index 097060e33bd6a67b0743a4cb3918b1baef765496..3982b479bb5fe46ae147b01d0cf91933ea8be455 100644 (file)
@@ -76,7 +76,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
        if (kvm_cpu_has_extint(v))
                return 1;
 
-       if (kvm_vcpu_apic_vid_enabled(v))
+       if (kvm_vcpu_apicv_active(v))
                return 0;
 
        return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
index 9469d453abc8026746e58b37f6a3f2e1057bf3b6..618a20d5ca99fffb2d09198dca8aa6e61620ac85 100644 (file)
@@ -379,7 +379,8 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
        if (!apic->irr_pending)
                return -1;
 
-       kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
+       if (apic->vcpu->arch.apicv_active)
+               kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
        result = apic_search_irr(apic);
        ASSERT(result == -1 || result >= 16);
 
@@ -392,7 +393,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
        vcpu = apic->vcpu;
 
-       if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) {
+       if (unlikely(vcpu->arch.apicv_active)) {
                /* try to update RVI */
                apic_clear_vector(vec, apic->regs + APIC_IRR);
                kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -418,7 +419,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
         * because the processor can modify ISR under the hood.  Instead
         * just set SVI.
         */
-       if (unlikely(kvm_x86_ops->hwapic_isr_update))
+       if (unlikely(vcpu->arch.apicv_active))
                kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
        else {
                ++apic->isr_count;
@@ -466,7 +467,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
         * on the other hand isr_count and highest_isr_cache are unused
         * and must be left alone.
         */
-       if (unlikely(kvm_x86_ops->hwapic_isr_update))
+       if (unlikely(vcpu->arch.apicv_active))
                kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
                                               apic_find_highest_isr(apic));
        else {
@@ -852,7 +853,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                                apic_clear_vector(vector, apic->regs + APIC_TMR);
                }
 
-               if (kvm_x86_ops->deliver_posted_interrupt)
+               if (vcpu->arch.apicv_active)
                        kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
                else {
                        apic_set_irr(vector, apic);
@@ -1225,7 +1226,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
                int vec = reg & APIC_VECTOR_MASK;
                void *bitmap = apic->regs + APIC_ISR;
 
-               if (kvm_x86_ops->deliver_posted_interrupt)
+               if (vcpu->arch.apicv_active)
                        bitmap = apic->regs + APIC_IRR;
 
                if (apic_test_vector(vec, bitmap))
@@ -1693,8 +1694,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
                apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
                apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
        }
-       apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu);
-       apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0;
+       apic->irr_pending = vcpu->arch.apicv_active;
+       apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
        apic->highest_isr_cache = -1;
        update_divide_count(apic);
        atomic_set(&apic->lapic_timer.pending, 0);
@@ -1906,15 +1907,15 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
        update_divide_count(apic);
        start_apic_timer(apic);
        apic->irr_pending = true;
-       apic->isr_count = kvm_x86_ops->hwapic_isr_update ?
+       apic->isr_count = vcpu->arch.apicv_active ?
                                1 : count_vectors(apic->regs + APIC_ISR);
        apic->highest_isr_cache = -1;
-       if (kvm_x86_ops->hwapic_irr_update)
+       if (vcpu->arch.apicv_active) {
                kvm_x86_ops->hwapic_irr_update(vcpu,
                                apic_find_highest_irr(apic));
-       if (unlikely(kvm_x86_ops->hwapic_isr_update))
                kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
                                apic_find_highest_isr(apic));
+       }
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        if (ioapic_in_kernel(vcpu->kvm))
                kvm_rtc_eoi_tracking_restore_one(vcpu);
index fde8e35d585050e7e2d26b9df326e7542b3d7f38..5fc60e4bb4e233feb896d03b356f41a8a41e3f03 100644 (file)
@@ -143,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
        return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
 }
 
-static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu)
 {
-       return kvm_x86_ops->cpu_uses_apicv(vcpu);
+       return vcpu->arch.apic && vcpu->arch.apicv_active;
 }
 
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
index ebb76e8a91e1a447460deff5cd7c441ebc865bf5..2401fc88905b3f5369d353e41039ef8553e94d8b 100644 (file)
@@ -3559,9 +3559,13 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
        return;
 }
 
-static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
+static bool svm_get_enable_apicv(void)
+{
+       return false;
+}
+
+static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
-       return 0;
 }
 
 static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -4328,7 +4332,8 @@ static struct kvm_x86_ops svm_x86_ops = {
        .enable_irq_window = enable_irq_window,
        .update_cr8_intercept = update_cr8_intercept,
        .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
-       .cpu_uses_apicv = svm_cpu_uses_apicv,
+       .get_enable_apicv = svm_get_enable_apicv,
+       .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
        .load_eoi_exitmap = svm_load_eoi_exitmap,
        .sync_pir_to_irr = svm_sync_pir_to_irr,
 
index c8a87c94dc81f35062ea19e48b1819d27ff91ee9..1a8bfaab89c7ca5497a86d5856472955f1721321 100644 (file)
@@ -19,6 +19,7 @@
 #include "irq.h"
 #include "mmu.h"
 #include "cpuid.h"
+#include "lapic.h"
 
 #include <linux/kvm_host.h>
 #include <linux/module.h>
@@ -862,7 +863,6 @@ static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
 static bool vmx_xsaves_supported(void);
-static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
                            struct kvm_segment *var, int seg);
@@ -870,7 +870,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
                            struct kvm_segment *var, int seg);
 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
-static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
 static int alloc_identity_pagetable(struct kvm *kvm);
@@ -2498,7 +2497,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
        vmx->nested.nested_vmx_pinbased_ctls_high |=
                PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
                PIN_BASED_VMX_PREEMPTION_TIMER;
-       if (vmx_cpu_uses_apicv(&vmx->vcpu))
+       if (kvm_vcpu_apicv_active(&vmx->vcpu))
                vmx->nested.nested_vmx_pinbased_ctls_high |=
                        PIN_BASED_POSTED_INTR;
 
@@ -4462,9 +4461,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
                        msr, MSR_TYPE_W);
 }
 
-static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu)
+static bool vmx_get_enable_apicv(void)
 {
-       return enable_apicv && lapic_in_kernel(vcpu);
+       return enable_apicv;
 }
 
 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
@@ -4586,11 +4585,6 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
        kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
 }
 
-static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
-{
-       return;
-}
-
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -4660,11 +4654,18 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 {
        u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
 
-       if (!vmx_cpu_uses_apicv(&vmx->vcpu))
+       if (!kvm_vcpu_apicv_active(&vmx->vcpu))
                pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
        return pin_based_exec_ctrl;
 }
 
+static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+}
+
 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 {
        u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@@ -4703,7 +4704,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
                exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
        if (!ple_gap)
                exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
-       if (!vmx_cpu_uses_apicv(&vmx->vcpu))
+       if (!kvm_vcpu_apicv_active(&vmx->vcpu))
                exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
        exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
@@ -4767,7 +4768,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
                vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
                                vmx_secondary_exec_control(vmx));
 
-       if (vmx_cpu_uses_apicv(&vmx->vcpu)) {
+       if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
                vmcs_write64(EOI_EXIT_BITMAP0, 0);
                vmcs_write64(EOI_EXIT_BITMAP1, 0);
                vmcs_write64(EOI_EXIT_BITMAP2, 0);
@@ -4919,7 +4920,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
        kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-       if (vmx_cpu_uses_apicv(vcpu))
+       if (kvm_vcpu_apicv_active(vcpu))
                memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
 
        if (vmx->vpid != 0)
@@ -6203,15 +6204,6 @@ static __init int hardware_setup(void)
                kvm_tsc_scaling_ratio_frac_bits = 48;
        }
 
-       if (enable_apicv)
-               kvm_x86_ops->update_cr8_intercept = NULL;
-       else {
-               kvm_x86_ops->hwapic_irr_update = NULL;
-               kvm_x86_ops->hwapic_isr_update = NULL;
-               kvm_x86_ops->deliver_posted_interrupt = NULL;
-               kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-       }
-
        vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
        vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
        vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
@@ -8152,7 +8144,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
         * apicv
         */
        if (!cpu_has_vmx_virtualize_x2apic_mode() ||
-                               !vmx_cpu_uses_apicv(vcpu))
+                               !kvm_vcpu_apicv_active(vcpu))
                return;
 
        if (!cpu_need_tpr_shadow(vcpu))
@@ -8259,7 +8251,7 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
-       if (!vmx_cpu_uses_apicv(vcpu))
+       if (!kvm_vcpu_apicv_active(vcpu))
                return;
 
        vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
@@ -10803,7 +10795,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .update_cr8_intercept = update_cr8_intercept,
        .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
        .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
-       .cpu_uses_apicv = vmx_cpu_uses_apicv,
+       .get_enable_apicv = vmx_get_enable_apicv,
+       .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
        .load_eoi_exitmap = vmx_load_eoi_exitmap,
        .hwapic_irr_update = vmx_hwapic_irr_update,
        .hwapic_isr_update = vmx_hwapic_isr_update,
index 9c69337a3d61494ddb1b1a19781c40810898c390..f0250a092ef311893a6487afa90f901ef74e36dc 100644 (file)
@@ -2748,7 +2748,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
                                    struct kvm_lapic_state *s)
 {
-       kvm_x86_ops->sync_pir_to_irr(vcpu);
+       if (vcpu->arch.apicv_active)
+               kvm_x86_ops->sync_pir_to_irr(vcpu);
+
        memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
 
        return 0;
@@ -5867,6 +5869,12 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
        kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
+void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.apicv_active = false;
+       kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
        unsigned long nr, a0, a1, a2, a3, ret;
@@ -5960,6 +5968,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
        if (!vcpu->arch.apic)
                return;
 
+       if (vcpu->arch.apicv_active)
+               return;
+
        if (!vcpu->arch.apic->vapic_addr)
                max_irr = kvm_lapic_find_highest_irr(vcpu);
        else
@@ -6306,7 +6317,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
        if (irqchip_split(vcpu->kvm))
                kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
        else {
-               kvm_x86_ops->sync_pir_to_irr(vcpu);
+               if (vcpu->arch.apicv_active)
+                       kvm_x86_ops->sync_pir_to_irr(vcpu);
                kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
        }
        kvm_x86_ops->load_eoi_exitmap(vcpu,
@@ -6453,7 +6465,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 * Update architecture specific hints for APIC
                 * virtual interrupt delivery.
                 */
-               if (kvm_x86_ops->hwapic_irr_update)
+               if (vcpu->arch.apicv_active)
                        kvm_x86_ops->hwapic_irr_update(vcpu,
                                kvm_lapic_find_highest_irr(vcpu));
        }
@@ -7524,6 +7536,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        BUG_ON(vcpu->kvm == NULL);
        kvm = vcpu->kvm;
 
+       vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv();
        vcpu->arch.pv.pv_unhalted = false;
        vcpu->arch.emulate_ctxt.ops = &emulate_ops;
        if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))