KVM: x86: Omit caching logic for always-available GPRs
authorSean Christopherson <sean.j.christopherson@intel.com>
Tue, 30 Apr 2019 17:36:17 +0000 (10:36 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Tue, 30 Apr 2019 19:56:12 +0000 (21:56 +0200)
Except for RSP and RIP, which are held in VMX's VMCS, GPRs are always
treated "available and dirtly" on both VMX and SVM, i.e. are
unconditionally loaded/saved immediately before/after VM-Enter/VM-Exit.

Eliminating the unnecessary caching code reduces the size of KVM by a
non-trivial amount, much of which comes from the most common code paths.
E.g. on x86_64, kvm_emulate_cpuid() is reduced from 342 to 182 bytes and
kvm_emulate_hypercall() from 1362 to 1143, with the total size of KVM
dropping by ~1000 bytes.  With CONFIG_RETPOLINE=y, the numbers are even
more pronounced, e.g.: 353->182, 1418->1172 and well over 2000 bytes.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/cpuid.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/kvm_cache_regs.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c

index fd3951638ae45aebcc2c9ecaa0ee5e100b41e741..fc8ccf596624072240fd062f5b9050bc2d745835 100644 (file)
@@ -962,13 +962,13 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
        if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
                return 1;
 
-       eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
-       ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+       eax = kvm_rax_read(vcpu);
+       ecx = kvm_rcx_read(vcpu);
        kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
-       kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
-       kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
-       kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
-       kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
+       kvm_rax_write(vcpu, eax);
+       kvm_rbx_write(vcpu, ebx);
+       kvm_rcx_write(vcpu, ecx);
+       kvm_rdx_write(vcpu, edx);
        return kvm_skip_emulated_instruction(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
index 421899f6ad7bfe28237f37bcee6fbee0485eb510..7868daceb25b3f5ddc3fedd9c9d2fa456dd00f88 100644 (file)
@@ -1526,10 +1526,10 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
 
        longmode = is_64_bit_mode(vcpu);
        if (longmode)
-               kvm_register_write(vcpu, VCPU_REGS_RAX, result);
+               kvm_rax_write(vcpu, result);
        else {
-               kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
-               kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
+               kvm_rdx_write(vcpu, result >> 32);
+               kvm_rax_write(vcpu, result & 0xffffffff);
        }
 }
 
@@ -1602,18 +1602,18 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
        longmode = is_64_bit_mode(vcpu);
 
        if (!longmode) {
-               param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
-                       (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
-               ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
-                       (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
-               outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
-                       (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+               param = ((u64)kvm_rdx_read(vcpu) << 32) |
+                       (kvm_rax_read(vcpu) & 0xffffffff);
+               ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
+                       (kvm_rcx_read(vcpu) & 0xffffffff);
+               outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
+                       (kvm_rsi_read(vcpu) & 0xffffffff);
        }
 #ifdef CONFIG_X86_64
        else {
-               param = kvm_register_read(vcpu, VCPU_REGS_RCX);
-               ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
-               outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+               param = kvm_rcx_read(vcpu);
+               ingpa = kvm_rdx_read(vcpu);
+               outgpa = kvm_r8_read(vcpu);
        }
 #endif
 
index f8f56a93358ba0d53b3fa509f6129911c518810e..d179b7d7860df8121e286665bb9a8bff862ddd08 100644 (file)
@@ -9,6 +9,34 @@
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
         | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
 
+#define BUILD_KVM_GPR_ACCESSORS(lname, uname)                                \
+static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
+{                                                                            \
+       return vcpu->arch.regs[VCPU_REGS_##uname];                            \
+}                                                                            \
+static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu,       \
+                                               unsigned long val)            \
+{                                                                            \
+       vcpu->arch.regs[VCPU_REGS_##uname] = val;                             \
+}
+BUILD_KVM_GPR_ACCESSORS(rax, RAX)
+BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
+BUILD_KVM_GPR_ACCESSORS(rcx, RCX)
+BUILD_KVM_GPR_ACCESSORS(rdx, RDX)
+BUILD_KVM_GPR_ACCESSORS(rbp, RBP)
+BUILD_KVM_GPR_ACCESSORS(rsi, RSI)
+BUILD_KVM_GPR_ACCESSORS(rdi, RDI)
+#ifdef CONFIG_X86_64
+BUILD_KVM_GPR_ACCESSORS(r8,  R8)
+BUILD_KVM_GPR_ACCESSORS(r9,  R9)
+BUILD_KVM_GPR_ACCESSORS(r10, R10)
+BUILD_KVM_GPR_ACCESSORS(r11, R11)
+BUILD_KVM_GPR_ACCESSORS(r12, R12)
+BUILD_KVM_GPR_ACCESSORS(r13, R13)
+BUILD_KVM_GPR_ACCESSORS(r14, R14)
+BUILD_KVM_GPR_ACCESSORS(r15, R15)
+#endif
+
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
                                              enum kvm_reg reg)
 {
@@ -83,8 +111,8 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
 
 static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
 {
-       return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
-               | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
+       return (kvm_rax_read(vcpu) & -1u)
+               | ((u64)(kvm_rdx_read(vcpu) & -1u) << 32);
 }
 
 static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
index 6e374a0e3bc377d69a3acd31e7836ff4716b28e0..38aef34397992aaaaf1db127ae1cb0c69d3bca2d 100644 (file)
@@ -2091,7 +2091,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        init_vmcb(svm);
 
        kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
-       kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
+       kvm_rdx_write(vcpu, eax);
 
        if (kvm_vcpu_apicv_active(vcpu) && !init_event)
                avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
@@ -3388,7 +3388,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
        } else {
                (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
        }
-       kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
+       kvm_rax_write(&svm->vcpu, hsave->save.rax);
        kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
        kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
        svm->vmcb->save.dr7 = 0;
@@ -3496,7 +3496,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
        kvm_mmu_reset_context(&svm->vcpu);
 
        svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
-       kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
+       kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
        kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
        kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
 
@@ -3787,11 +3787,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
 
-       trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
-                         kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+       trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu),
+                         kvm_rax_read(&svm->vcpu));
 
        /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
-       kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+       kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
 
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3799,7 +3799,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
 
 static int skinit_interception(struct vcpu_svm *svm)
 {
-       trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+       trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu));
 
        kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
@@ -3813,7 +3813,7 @@ static int wbinvd_interception(struct vcpu_svm *svm)
 static int xsetbv_interception(struct vcpu_svm *svm)
 {
        u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
-       u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+       u32 index = kvm_rcx_read(&svm->vcpu);
 
        if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
                svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
@@ -4209,7 +4209,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-       u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+       u32 ecx = kvm_rcx_read(&svm->vcpu);
        struct msr_data msr_info;
 
        msr_info.index = ecx;
@@ -4221,10 +4221,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
        } else {
                trace_kvm_msr_read(ecx, msr_info.data);
 
-               kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
-                                  msr_info.data & 0xffffffff);
-               kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
-                                  msr_info.data >> 32);
+               kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff);
+               kvm_rdx_write(&svm->vcpu, msr_info.data >> 32);
                svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
                return kvm_skip_emulated_instruction(&svm->vcpu);
        }
@@ -4418,7 +4416,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
        struct msr_data msr;
-       u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+       u32 ecx = kvm_rcx_read(&svm->vcpu);
        u64 data = kvm_read_edx_eax(&svm->vcpu);
 
        msr.data = data;
index 8641c99a3255b949ed0a72937520eb6bdb44ea82..62924c5d1c060705de6e04afac98f416a89ff92b 100644 (file)
@@ -4926,7 +4926,7 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
 static int handle_xsetbv(struct kvm_vcpu *vcpu)
 {
        u64 new_bv = kvm_read_edx_eax(vcpu);
-       u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+       u32 index = kvm_rcx_read(vcpu);
 
        if (kvm_set_xcr(vcpu, index, new_bv) == 0)
                return kvm_skip_emulated_instruction(vcpu);
index 044ece3a28a44e6b1632434fac8be281482a17c4..b352a7c137cd0dfdda1c26d15a1e0634a3ec16f3 100644 (file)
@@ -1096,15 +1096,15 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
 
 bool kvm_rdpmc(struct kvm_vcpu *vcpu)
 {
-       u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+       u32 ecx = kvm_rcx_read(vcpu);
        u64 data;
        int err;
 
        err = kvm_pmu_rdpmc(vcpu, ecx, &data);
        if (err)
                return err;
-       kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
-       kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
+       kvm_rax_write(vcpu, (u32)data);
+       kvm_rdx_write(vcpu, data >> 32);
        return err;
 }
 EXPORT_SYMBOL_GPL(kvm_rdpmc);
@@ -6586,7 +6586,7 @@ static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
 static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
                            unsigned short port)
 {
-       unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
+       unsigned long val = kvm_rax_read(vcpu);
        int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
                                            size, port, &val, 1);
 
@@ -6610,8 +6610,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
        }
 
        /* For size less than 4 we merge, else we zero extend */
-       val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX)
-                                       : 0;
+       val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
 
        /*
         * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform
@@ -6619,7 +6618,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
         */
        emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
                                 vcpu->arch.pio.port, &val, 1);
-       kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+       kvm_rax_write(vcpu, val);
 
        return kvm_skip_emulated_instruction(vcpu);
 }
@@ -6631,12 +6630,12 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
        int ret;
 
        /* For size less than 4 we merge, else we zero extend */
-       val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0;
+       val = (size < 4) ? kvm_rax_read(vcpu) : 0;
 
        ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
                                       &val, 1);
        if (ret) {
-               kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+               kvm_rax_write(vcpu, val);
                return ret;
        }
 
@@ -7151,11 +7150,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        if (kvm_hv_hypercall_enabled(vcpu->kvm))
                return kvm_hv_hypercall(vcpu);
 
-       nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
-       a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
-       a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
-       a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
-       a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
+       nr = kvm_rax_read(vcpu);
+       a0 = kvm_rbx_read(vcpu);
+       a1 = kvm_rcx_read(vcpu);
+       a2 = kvm_rdx_read(vcpu);
+       a3 = kvm_rsi_read(vcpu);
 
        trace_kvm_hypercall(nr, a0, a1, a2, a3);
 
@@ -7196,7 +7195,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 out:
        if (!op_64_bit)
                ret = (u32)ret;
-       kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+       kvm_rax_write(vcpu, ret);
 
        ++vcpu->stat.hypercalls;
        return kvm_skip_emulated_instruction(vcpu);
@@ -8285,23 +8284,23 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
                emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
        }
-       regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
-       regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
-       regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
-       regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
-       regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
-       regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
+       regs->rax = kvm_rax_read(vcpu);
+       regs->rbx = kvm_rbx_read(vcpu);
+       regs->rcx = kvm_rcx_read(vcpu);
+       regs->rdx = kvm_rdx_read(vcpu);
+       regs->rsi = kvm_rsi_read(vcpu);
+       regs->rdi = kvm_rdi_read(vcpu);
        regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
-       regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
+       regs->rbp = kvm_rbp_read(vcpu);
 #ifdef CONFIG_X86_64
-       regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
-       regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
-       regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
-       regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
-       regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
-       regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
-       regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
-       regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
+       regs->r8 = kvm_r8_read(vcpu);
+       regs->r9 = kvm_r9_read(vcpu);
+       regs->r10 = kvm_r10_read(vcpu);
+       regs->r11 = kvm_r11_read(vcpu);
+       regs->r12 = kvm_r12_read(vcpu);
+       regs->r13 = kvm_r13_read(vcpu);
+       regs->r14 = kvm_r14_read(vcpu);
+       regs->r15 = kvm_r15_read(vcpu);
 #endif
 
        regs->rip = kvm_rip_read(vcpu);
@@ -8321,23 +8320,23 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
        vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 
-       kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
-       kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
-       kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
-       kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
-       kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
-       kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
+       kvm_rax_write(vcpu, regs->rax);
+       kvm_rbx_write(vcpu, regs->rbx);
+       kvm_rcx_write(vcpu, regs->rcx);
+       kvm_rdx_write(vcpu, regs->rdx);
+       kvm_rsi_write(vcpu, regs->rsi);
+       kvm_rdi_write(vcpu, regs->rdi);
        kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
-       kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
+       kvm_rbp_write(vcpu, regs->rbp);
 #ifdef CONFIG_X86_64
-       kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
-       kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
-       kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
-       kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
-       kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
-       kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
-       kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
-       kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
+       kvm_r8_write(vcpu, regs->r8);
+       kvm_r9_write(vcpu, regs->r9);
+       kvm_r10_write(vcpu, regs->r10);
+       kvm_r11_write(vcpu, regs->r11);
+       kvm_r12_write(vcpu, regs->r12);
+       kvm_r13_write(vcpu, regs->r13);
+       kvm_r14_write(vcpu, regs->r14);
+       kvm_r15_write(vcpu, regs->r15);
 #endif
 
        kvm_rip_write(vcpu, regs->rip);