Merge branch 'msr-bitmaps' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorRadim Krčmář <rkrcmar@redhat.com>
Fri, 2 Feb 2018 17:26:58 +0000 (18:26 +0100)
committerRadim Krčmář <rkrcmar@redhat.com>
Fri, 9 Feb 2018 20:35:35 +0000 (21:35 +0100)
This topic branch allocates separate MSR bitmaps for each VCPU.
This is required for the IBRS enablement to choose, on a per-VM
basis, whether to intercept the SPEC_CTRL and PRED_CMD MSRs;
the IBRS enablement comes in through the tip tree.

1  2 
arch/x86/kvm/vmx.c

index bb5b4888505bdccc4a505aa1adcde95d9edd62f8,896af99a8606f3efc1f12f55515528120adcbcdb..9973a301364e0e0c8aa7d8be23f0c0d8e69ac3c7
@@@ -914,12 -953,8 +923,6 @@@ static DEFINE_PER_CPU(struct list_head
  static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
  
  enum {
-       VMX_MSR_BITMAP_LEGACY,
-       VMX_MSR_BITMAP_LONGMODE,
-       VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
-       VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
-       VMX_MSR_BITMAP_LEGACY_X2APIC,
-       VMX_MSR_BITMAP_LONGMODE_X2APIC,
 -      VMX_IO_BITMAP_A,
 -      VMX_IO_BITMAP_B,
        VMX_VMREAD_BITMAP,
        VMX_VMWRITE_BITMAP,
        VMX_BITMAP_NR
  
  static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
  
- #define vmx_msr_bitmap_legacy                (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
- #define vmx_msr_bitmap_longmode              (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
- #define vmx_msr_bitmap_legacy_x2apic_apicv   (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
- #define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
- #define vmx_msr_bitmap_legacy_x2apic         (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
- #define vmx_msr_bitmap_longmode_x2apic       (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
 -#define vmx_io_bitmap_a                      (vmx_bitmap[VMX_IO_BITMAP_A])
 -#define vmx_io_bitmap_b                      (vmx_bitmap[VMX_IO_BITMAP_B])
  #define vmx_vmread_bitmap                    (vmx_bitmap[VMX_VMREAD_BITMAP])
  #define vmx_vmwrite_bitmap                   (vmx_bitmap[VMX_VMWRITE_BITMAP])
  
@@@ -6786,9 -6825,10 +6845,6 @@@ static __init int hardware_setup(void
        memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
        memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
  
-       memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
-       memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
 -      memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
 -
 -      memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
--
        if (setup_vmcs_config(&vmcs_config) < 0) {
                r = -EIO;
                goto out;
@@@ -10030,13 -10043,8 +10048,13 @@@ static inline bool nested_vmx_prepare_m
        int msr;
        struct page *page;
        unsigned long *msr_bitmap_l1;
-       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
+       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
  
 +      /* Nothing to do if the MSR bitmap is not in use.  */
 +      if (!cpu_has_vmx_msr_bitmap() ||
 +          !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
 +              return false;
 +
        /* This shortcut is ok because we support only x2APIC MSRs so far. */
        if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
                return false;
@@@ -10382,122 -10398,6 +10400,125 @@@ static void prepare_vmcs02_full(struct 
        vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
        vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
  
 +      vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
 +      vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
 +              vmcs12->guest_pending_dbg_exceptions);
 +      vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
 +      vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
 +
 +      if (nested_cpu_has_xsaves(vmcs12))
 +              vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
 +      vmcs_write64(VMCS_LINK_POINTER, -1ull);
 +
 +      if (cpu_has_vmx_posted_intr())
 +              vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
 +
 +      /*
 +       * Whether page-faults are trapped is determined by a combination of
 +       * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
 +       * If enable_ept, L0 doesn't care about page faults and we should
 +       * set all of these to L1's desires. However, if !enable_ept, L0 does
 +       * care about (at least some) page faults, and because it is not easy
 +       * (if at all possible?) to merge L0 and L1's desires, we simply ask
 +       * to exit on each and every L2 page fault. This is done by setting
 +       * MASK=MATCH=0 and (see below) EB.PF=1.
 +       * Note that below we don't need special code to set EB.PF beyond the
 +       * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
 +       * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
 +       * !enable_ept, EB.PF is 1, so the "or" will always be 1.
 +       */
 +      vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
 +              enable_ept ? vmcs12->page_fault_error_code_mask : 0);
 +      vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
 +              enable_ept ? vmcs12->page_fault_error_code_match : 0);
 +
 +      /* All VMFUNCs are currently emulated through L0 vmexits.  */
 +      if (cpu_has_vmx_vmfunc())
 +              vmcs_write64(VM_FUNCTION_CONTROL, 0);
 +
 +      if (cpu_has_vmx_apicv()) {
 +              vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
 +              vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
 +              vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
 +              vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
 +      }
 +
 +      /*
 +       * Set host-state according to L0's settings (vmcs12 is irrelevant here)
 +       * Some constant fields are set here by vmx_set_constant_host_state().
 +       * Other fields are different per CPU, and will be set later when
 +       * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
 +       */
 +      vmx_set_constant_host_state(vmx);
 +
 +      /*
 +       * Set the MSR load/store lists to match L0's settings.
 +       */
 +      vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
 +      vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
 +      vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
 +      vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
 +      vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
 +
 +      set_cr4_guest_host_mask(vmx);
 +
 +      if (vmx_mpx_supported())
 +              vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
 +
 +      if (enable_vpid) {
 +              if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
 +                      vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
 +              else
 +                      vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 +      }
 +
 +      /*
 +       * L1 may access the L2's PDPTR, so save them to construct vmcs12
 +       */
 +      if (enable_ept) {
 +              vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
 +              vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
 +              vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
 +              vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
 +      }
++
++      if (cpu_has_vmx_msr_bitmap())
++              vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
 +}
 +
 +/*
 + * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
 + * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
 + * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
 + * guest in a way that will both be appropriate to L1's requests, and our
 + * needs. In addition to modifying the active vmcs (which is vmcs02), this
 + * function also has additional necessary side-effects, like setting various
 + * vcpu->arch fields.
 + * Returns 0 on success, 1 on failure. Invalid state exit qualification code
 + * is assigned to entry_failure_code on failure.
 + */
 +static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 +                        bool from_vmentry, u32 *entry_failure_code)
 +{
 +      struct vcpu_vmx *vmx = to_vmx(vcpu);
 +      u32 exec_control, vmcs12_exec_ctrl;
 +
 +      /*
 +       * First, the fields that are shadowed.  This must be kept in sync
 +       * with vmx_shadow_fields.h.
 +       */
 +
 +      vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
 +      vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
 +      vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
 +      vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
 +      vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
 +
 +      /*
 +       * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR,
 +       * HOST_FS_BASE, HOST_GS_BASE.
 +       */
 +
        if (from_vmentry &&
            (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
                kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);