KVM: VMX: Handle single-step #DB for EMULTYPE_SKIP on EPT misconfig

author Sean Christopherson <sean.j.christopherson@intel.com>

Tue, 27 Aug 2019 21:40:39 +0000 (14:40 -0700)

committer Paolo Bonzini <pbonzini@redhat.com>

Tue, 24 Sep 2019 12:34:08 +0000 (14:34 +0200)
author Sean Christopherson <sean.j.christopherson@intel.com>
Tue, 27 Aug 2019 21:40:39 +0000 (14:40 -0700)
committer Paolo Bonzini <pbonzini@redhat.com>
Tue, 24 Sep 2019 12:34:08 +0000 (14:34 +0200)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c

index cb7ad362e9b9d731bb43852b030ebbfe820929e8..ce75296b1b1073adf0d187ed12dcf7e592dea87c 100644 (file)
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -777,14 +777,15 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
                 svm->next_rip = svm->vmcb->control.next_rip;
         }
  
-       if (!svm->next_rip)
-               return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
-
-       if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
-               printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
-                      __func__, kvm_rip_read(vcpu), svm->next_rip);
-
-       kvm_rip_write(vcpu, svm->next_rip);
+       if (!svm->next_rip) {
+               if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+                       return 0;
+       } else {
+               if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
+                       pr_err("%s: ip 0x%lx next 0x%llx\n",
+                              __func__, kvm_rip_read(vcpu), svm->next_rip);
+               kvm_rip_write(vcpu, svm->next_rip);
+       }
         svm_set_interrupt_shadow(vcpu, 0);
  
         return 1;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index e71dc36850cbebb1b102e67d0b095cfe54620a0e..ef98311ad15356741527704d2dfaf790dcd2cef6 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1501,17 +1501,27 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
         return 0;
  }
  
-/*
- * Returns an int to be compatible with SVM implementation (which can fail).
- * Do not use directly, use skip_emulated_instruction() instead.
- */
-static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
+static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
  {
         unsigned long rip;
  
-       rip = kvm_rip_read(vcpu);
-       rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-       kvm_rip_write(vcpu, rip);
+       /*
+        * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
+        * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
+        * set when EPT misconfig occurs.  In practice, real hardware updates
+        * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
+        * (namely Hyper-V) don't set it due to it being undefined behavior,
+        * i.e. we end up advancing IP with some random value.
+        */
+       if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
+           to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
+               rip = kvm_rip_read(vcpu);
+               rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+               kvm_rip_write(vcpu, rip);
+       } else {
+               if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+                       return 0;
+       }
  
         /* skipping an emulated instruction also counts */
         vmx_set_interrupt_shadow(vcpu, 0);
@@ -1519,11 +1529,6 @@ static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
         return 1;
  }
  
-static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu)
-{
-       (void)__skip_emulated_instruction(vcpu);
-}
-
  static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
  {
         /*
@@ -4587,7 +4592,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
                         vcpu->arch.dr6 &= ~DR_TRAP_BITS;
                         vcpu->arch.dr6 |= dr6 | DR6_RTM;
                         if (is_icebp(intr_info))
-                               skip_emulated_instruction(vcpu);
+                               WARN_ON(!skip_emulated_instruction(vcpu));
  
                         kvm_queue_exception(vcpu, DB_VECTOR);
                         return 1;
@@ -5068,7 +5073,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
         if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
                        type != INTR_TYPE_EXT_INTR &&
                        type != INTR_TYPE_NMI_INTR))
-               skip_emulated_instruction(vcpu);
+               WARN_ON(!skip_emulated_instruction(vcpu));
  
         /*
          * TODO: What about debug traps on tss switch?
@@ -5135,20 +5140,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
         if (!is_guest_mode(vcpu) &&
             !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
                 trace_kvm_fast_mmio(gpa);
-               /*
-                * Doing kvm_skip_emulated_instruction() depends on undefined
-                * behavior: Intel's manual doesn't mandate
-                * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
-                * occurs and while on real hardware it was observed to be set,
-                * other hypervisors (namely Hyper-V) don't set it, we end up
-                * advancing IP with some random value. Disable fast mmio when
-                * running nested and keep it for real hardware in hope that
-                * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
-                */
-               if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
-                       return kvm_skip_emulated_instruction(vcpu);
-               else
-                       return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
+               return kvm_skip_emulated_instruction(vcpu);
         }
  
         return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -7722,7 +7714,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
  
         .run = vmx_vcpu_run,
         .handle_exit = vmx_handle_exit,
-       .skip_emulated_instruction = __skip_emulated_instruction,
+       .skip_emulated_instruction = skip_emulated_instruction,
         .set_interrupt_shadow = vmx_set_interrupt_shadow,
         .get_interrupt_shadow = vmx_get_interrupt_shadow,
         .patch_hypercall = vmx_patch_hypercall,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index a83b269126a09941cf22c351e07512e6645b5997..c38d247dbffbe4d8b799554bea926a32aedb2b5d 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6657,11 +6657,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                 return 1;
         }
  
+       /*
+        * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
+        * for kvm_skip_emulated_instruction().  The caller is responsible for
+        * updating interruptibility state and injecting single-step #DBs.
+        */
         if (emulation_type & EMULTYPE_SKIP) {
                 kvm_rip_write(vcpu, ctxt->_eip);
                 if (ctxt->eflags & X86_EFLAGS_RF)
                         kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
-               kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
                 return 1;
         }
author	Sean Christopherson <sean.j.christopherson@intel.com>
	Tue, 27 Aug 2019 21:40:39 +0000 (14:40 -0700)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Tue, 24 Sep 2019 12:34:08 +0000 (14:34 +0200)
arch/x86/kvm/svm.c		patch \| blob \| history
arch/x86/kvm/vmx/vmx.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history