KVM: PPC: Book3S HV: Streamlined guest entry/exit path on P9 for radix guests

author Paul Mackerras <paulus@ozlabs.org>

Mon, 8 Oct 2018 05:30:55 +0000 (16:30 +1100)

committer Michael Ellerman <mpe@ellerman.id.au>

Tue, 9 Oct 2018 05:04:27 +0000 (16:04 +1100)
author Paul Mackerras <paulus@ozlabs.org>
Mon, 8 Oct 2018 05:30:55 +0000 (16:30 +1100)
committer Michael Ellerman <mpe@ellerman.id.au>
Tue, 9 Oct 2018 05:04:27 +0000 (16:04 +1100)
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h

index 0c1a2b01512aa6b05c38c69742d644e25ac39a09..5c9b00cec4f2b9943f2168ffc93ec4f842fbed49 100644 (file)
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -165,4 +165,6 @@ void kvmhv_load_host_pmu(void);
  void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
  void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
  
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+
  #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h

index 03a60f76f3d73bfa9fb3ac64dac4f3b537b7af0c..88362ccda5498110a04de2a16cbe8fdfe4cd5efe 100644 (file)
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -583,6 +583,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
  
  extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
                                int level, bool line_status);
+extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
  #else
  static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
                                        u32 priority) { return -1; }
@@ -605,6 +606,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
  
  static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
                                       int level, bool line_status) { return -ENODEV; }
+static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
  #endif /* CONFIG_KVM_XIVE */
  
  /*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 0e17593ee6e3d357b039f81d8f862a9695bc7f66..4befa5a812feded0fc2636467f9bc22051da567b 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3079,6 +3079,273 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
         trace_kvmppc_run_core(vc, 1);
  }
  
+/*
+ * Load up hypervisor-mode registers on P9.
+ */
+static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit)
+{
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+       s64 hdec;
+       u64 tb, purr, spurr;
+       int trap;
+       unsigned long host_hfscr = mfspr(SPRN_HFSCR);
+       unsigned long host_ciabr = mfspr(SPRN_CIABR);
+       unsigned long host_dawr = mfspr(SPRN_DAWR);
+       unsigned long host_dawrx = mfspr(SPRN_DAWRX);
+       unsigned long host_psscr = mfspr(SPRN_PSSCR);
+       unsigned long host_pidr = mfspr(SPRN_PID);
+
+       hdec = time_limit - mftb();
+       if (hdec < 0)
+               return BOOK3S_INTERRUPT_HV_DECREMENTER;
+       mtspr(SPRN_HDEC, hdec);
+
+       if (vc->tb_offset) {
+               u64 new_tb = mftb() + vc->tb_offset;
+               mtspr(SPRN_TBU40, new_tb);
+               tb = mftb();
+               if ((tb & 0xffffff) < (new_tb & 0xffffff))
+                       mtspr(SPRN_TBU40, new_tb + 0x1000000);
+               vc->tb_offset_applied = vc->tb_offset;
+       }
+
+       if (vc->pcr)
+               mtspr(SPRN_PCR, vc->pcr);
+       mtspr(SPRN_DPDES, vc->dpdes);
+       mtspr(SPRN_VTB, vc->vtb);
+
+       local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+       local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+       mtspr(SPRN_PURR, vcpu->arch.purr);
+       mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+       if (cpu_has_feature(CPU_FTR_DAWR)) {
+               mtspr(SPRN_DAWR, vcpu->arch.dawr);
+               mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
+       }
+       mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+       mtspr(SPRN_IC, vcpu->arch.ic);
+       mtspr(SPRN_PID, vcpu->arch.pid);
+
+       mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+             (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+       mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+       mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+       mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+       mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+       mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+       mtspr(SPRN_AMOR, ~0UL);
+
+       mtspr(SPRN_LPCR, vc->lpcr);
+       isync();
+
+       kvmppc_xive_push_vcpu(vcpu);
+
+       mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+       mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
+       trap = __kvmhv_vcpu_entry_p9(vcpu);
+
+       /* Advance host PURR/SPURR by the amount used by guest */
+       purr = mfspr(SPRN_PURR);
+       spurr = mfspr(SPRN_SPURR);
+       mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
+             purr - vcpu->arch.purr);
+       mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
+             spurr - vcpu->arch.spurr);
+       vcpu->arch.purr = purr;
+       vcpu->arch.spurr = spurr;
+
+       vcpu->arch.ic = mfspr(SPRN_IC);
+       vcpu->arch.pid = mfspr(SPRN_PID);
+       vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+
+       vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+       vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+       vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+       vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+       mtspr(SPRN_PSSCR, host_psscr);
+       mtspr(SPRN_HFSCR, host_hfscr);
+       mtspr(SPRN_CIABR, host_ciabr);
+       mtspr(SPRN_DAWR, host_dawr);
+       mtspr(SPRN_DAWRX, host_dawrx);
+       mtspr(SPRN_PID, host_pidr);
+
+       /*
+        * Since this is radix, do a eieio; tlbsync; ptesync sequence in
+        * case we interrupted the guest between a tlbie and a ptesync.
+        */
+       asm volatile("eieio; tlbsync; ptesync");
+
+       mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid);    /* restore host LPID */
+       isync();
+
+       vc->dpdes = mfspr(SPRN_DPDES);
+       vc->vtb = mfspr(SPRN_VTB);
+       mtspr(SPRN_DPDES, 0);
+       if (vc->pcr)
+               mtspr(SPRN_PCR, 0);
+
+       if (vc->tb_offset_applied) {
+               u64 new_tb = mftb() - vc->tb_offset_applied;
+               mtspr(SPRN_TBU40, new_tb);
+               tb = mftb();
+               if ((tb & 0xffffff) < (new_tb & 0xffffff))
+                       mtspr(SPRN_TBU40, new_tb + 0x1000000);
+               vc->tb_offset_applied = 0;
+       }
+
+       mtspr(SPRN_HDEC, 0x7fffffff);
+       mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
+
+       return trap;
+}
+
+/*
+ * Virtual-mode guest entry for POWER9 and later when the host and
+ * guest are both using the radix MMU.  The LPIDR has already been set.
+ */
+int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit)
+{
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+       unsigned long host_dscr = mfspr(SPRN_DSCR);
+       unsigned long host_tidr = mfspr(SPRN_TIDR);
+       unsigned long host_iamr = mfspr(SPRN_IAMR);
+       s64 dec;
+       u64 tb;
+       int trap, save_pmu;
+
+       dec = mfspr(SPRN_DEC);
+       tb = mftb();
+       if (dec < 512)
+               return BOOK3S_INTERRUPT_HV_DECREMENTER;
+       local_paca->kvm_hstate.dec_expires = dec + tb;
+       if (local_paca->kvm_hstate.dec_expires < time_limit)
+               time_limit = local_paca->kvm_hstate.dec_expires;
+
+       vcpu->arch.ceded = 0;
+
+       kvmhv_save_host_pmu();          /* saves it to PACA kvm_hstate */
+
+       kvmppc_subcore_enter_guest();
+
+       vc->entry_exit_map = 1;
+       vc->in_guest = 1;
+
+       if (vcpu->arch.vpa.pinned_addr) {
+               struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+               u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+               lp->yield_count = cpu_to_be32(yield_count);
+               vcpu->arch.vpa.dirty = 1;
+       }
+
+       if (cpu_has_feature(CPU_FTR_TM) ||
+           cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+               kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+
+       kvmhv_load_guest_pmu(vcpu);
+
+       msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
+       load_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+       load_vr_state(&vcpu->arch.vr);
+#endif
+
+       mtspr(SPRN_DSCR, vcpu->arch.dscr);
+       mtspr(SPRN_IAMR, vcpu->arch.iamr);
+       mtspr(SPRN_PSPB, vcpu->arch.pspb);
+       mtspr(SPRN_FSCR, vcpu->arch.fscr);
+       mtspr(SPRN_TAR, vcpu->arch.tar);
+       mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+       mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+       mtspr(SPRN_BESCR, vcpu->arch.bescr);
+       mtspr(SPRN_WORT, vcpu->arch.wort);
+       mtspr(SPRN_TIDR, vcpu->arch.tid);
+       mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+       mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+       mtspr(SPRN_AMR, vcpu->arch.amr);
+       mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+
+       if (!(vcpu->arch.ctrl & 1))
+               mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+
+       mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
+
+       if (vcpu->arch.doorbell_request) {
+               vc->dpdes = 1;
+               smp_wmb();
+               vcpu->arch.doorbell_request = 0;
+       }
+
+       trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit);
+
+       vcpu->arch.slb_max = 0;
+       dec = mfspr(SPRN_DEC);
+       tb = mftb();
+       vcpu->arch.dec_expires = dec + tb;
+       vcpu->cpu = -1;
+       vcpu->arch.thread_cpu = -1;
+       vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+
+       vcpu->arch.iamr = mfspr(SPRN_IAMR);
+       vcpu->arch.pspb = mfspr(SPRN_PSPB);
+       vcpu->arch.fscr = mfspr(SPRN_FSCR);
+       vcpu->arch.tar = mfspr(SPRN_TAR);
+       vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+       vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+       vcpu->arch.bescr = mfspr(SPRN_BESCR);
+       vcpu->arch.wort = mfspr(SPRN_WORT);
+       vcpu->arch.tid = mfspr(SPRN_TIDR);
+       vcpu->arch.amr = mfspr(SPRN_AMR);
+       vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+       vcpu->arch.dscr = mfspr(SPRN_DSCR);
+
+       mtspr(SPRN_PSPB, 0);
+       mtspr(SPRN_WORT, 0);
+       mtspr(SPRN_AMR, 0);
+       mtspr(SPRN_UAMOR, 0);
+       mtspr(SPRN_DSCR, host_dscr);
+       mtspr(SPRN_TIDR, host_tidr);
+       mtspr(SPRN_IAMR, host_iamr);
+       mtspr(SPRN_PSPB, 0);
+
+       msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
+       store_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+       store_vr_state(&vcpu->arch.vr);
+#endif
+
+       if (cpu_has_feature(CPU_FTR_TM) ||
+           cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+               kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+
+       save_pmu = 1;
+       if (vcpu->arch.vpa.pinned_addr) {
+               struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+               u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+               lp->yield_count = cpu_to_be32(yield_count);
+               vcpu->arch.vpa.dirty = 1;
+               save_pmu = lp->pmcregs_in_use;
+       }
+
+       kvmhv_save_guest_pmu(vcpu, save_pmu);
+
+       vc->entry_exit_map = 0x101;
+       vc->in_guest = 0;
+
+       mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+
+       kvmhv_load_host_pmu();
+
+       kvmppc_subcore_exit_guest();
+
+       return trap;
+}
+
  /*
   * Wait for some other vcpu thread to execute us, and
   * wake us up when we need to handle something in the host.
@@ -3405,6 +3672,167 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
         return vcpu->arch.ret;
  }
  
+static int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
+                                struct kvm_vcpu *vcpu, u64 time_limit)
+{
+       int trap, r, pcpu, pcpu0;
+       int srcu_idx;
+       struct kvmppc_vcore *vc;
+       struct kvm *kvm = vcpu->kvm;
+
+       trace_kvmppc_run_vcpu_enter(vcpu);
+
+       kvm_run->exit_reason = 0;
+       vcpu->arch.ret = RESUME_GUEST;
+       vcpu->arch.trap = 0;
+
+       vc = vcpu->arch.vcore;
+       vcpu->arch.ceded = 0;
+       vcpu->arch.run_task = current;
+       vcpu->arch.kvm_run = kvm_run;
+       vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
+       vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+       vcpu->arch.busy_preempt = TB_NIL;
+       vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+       vc->runnable_threads[0] = vcpu;
+       vc->n_runnable = 1;
+       vc->runner = vcpu;
+
+       /* See if the MMU is ready to go */
+       if (!kvm->arch.mmu_ready) {
+               r = kvmhv_setup_mmu(vcpu);
+               if (r) {
+                       kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+                       kvm_run->fail_entry.
+                               hardware_entry_failure_reason = 0;
+                       vcpu->arch.ret = r;
+                       goto out;
+               }
+       }
+
+       if (need_resched())
+               cond_resched();
+
+       kvmppc_update_vpas(vcpu);
+
+       init_vcore_to_run(vc);
+       vc->preempt_tb = TB_NIL;
+
+       preempt_disable();
+       pcpu = smp_processor_id();
+       vc->pcpu = pcpu;
+       kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+
+       local_irq_disable();
+       hard_irq_disable();
+       if (signal_pending(current))
+               goto sigpend;
+       if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
+               goto out;
+
+       kvmppc_core_prepare_to_enter(vcpu);
+
+       kvmppc_clear_host_core(pcpu);
+
+       local_paca->kvm_hstate.tid = 0;
+       local_paca->kvm_hstate.napping = 0;
+       local_paca->kvm_hstate.kvm_split_mode = NULL;
+       kvmppc_start_thread(vcpu, vc);
+       kvmppc_create_dtl_entry(vcpu, vc);
+       trace_kvm_guest_enter(vcpu);
+
+       vc->vcore_state = VCORE_RUNNING;
+       trace_kvmppc_run_core(vc, 0);
+
+       mtspr(SPRN_LPID, vc->kvm->arch.lpid);
+       isync();
+
+       /* See comment above in kvmppc_run_core() about this */
+       pcpu0 = pcpu;
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               pcpu0 &= ~0x3UL;
+
+       if (cpumask_test_cpu(pcpu0, &kvm->arch.need_tlb_flush)) {
+               radix__local_flush_tlb_lpid_guest(kvm->arch.lpid);
+               /* Clear the bit after the TLB flush */
+               cpumask_clear_cpu(pcpu0, &kvm->arch.need_tlb_flush);
+       }
+
+       trace_hardirqs_on();
+       guest_enter_irqoff();
+
+       srcu_idx = srcu_read_lock(&kvm->srcu);
+
+       this_cpu_disable_ftrace();
+
+       trap = kvmhv_p9_guest_entry(vcpu, time_limit);
+       vcpu->arch.trap = trap;
+
+       this_cpu_enable_ftrace();
+
+       srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+       mtspr(SPRN_LPID, kvm->arch.host_lpid);
+       isync();
+
+       trace_hardirqs_off();
+       set_irq_happened(trap);
+
+       kvmppc_set_host_core(pcpu);
+
+       local_irq_enable();
+       guest_exit();
+
+       cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
+
+       preempt_enable();
+
+       /* cancel pending decrementer exception if DEC is now positive */
+       if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))
+               kvmppc_core_dequeue_dec(vcpu);
+
+       trace_kvm_guest_exit(vcpu);
+       r = RESUME_GUEST;
+       if (trap)
+               r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
+       vcpu->arch.ret = r;
+
+       if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
+           !kvmppc_vcpu_woken(vcpu)) {
+               kvmppc_set_timer(vcpu);
+               while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
+                       if (signal_pending(current)) {
+                               vcpu->stat.signal_exits++;
+                               kvm_run->exit_reason = KVM_EXIT_INTR;
+                               vcpu->arch.ret = -EINTR;
+                               break;
+                       }
+                       spin_lock(&vc->lock);
+                       kvmppc_vcore_blocked(vc);
+                       spin_unlock(&vc->lock);
+               }
+       }
+       vcpu->arch.ceded = 0;
+
+       vc->vcore_state = VCORE_INACTIVE;
+       trace_kvmppc_run_core(vc, 1);
+
+ done:
+       kvmppc_remove_runnable(vc, vcpu);
+       trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
+
+       return vcpu->arch.ret;
+
+ sigpend:
+       vcpu->stat.signal_exits++;
+       kvm_run->exit_reason = KVM_EXIT_INTR;
+       vcpu->arch.ret = -EINTR;
+ out:
+       local_irq_enable();
+       preempt_enable();
+       goto done;
+}
+
  static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
  {
         int r;
@@ -3480,7 +3908,10 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
         vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
  
         do {
-               r = kvmppc_run_vcpu(run, vcpu);
+               if (kvm->arch.threads_indep && kvm_is_radix(kvm))
+                       r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0);
+               else
+                       r = kvmppc_run_vcpu(run, vcpu);
  
                 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
                     !(vcpu->arch.shregs.msr & MSR_PR)) {
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c

index ee564b682f0c4e21e77e88696a7fda35cc6e27a9..0787f12c1a1bef3e22e10ed3aafe25f842a2b838 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void)
  
         local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
  }
+EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
  
  void kvmppc_subcore_exit_guest(void)
  {
@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void)
  
         local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
  }
+EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
  
  static bool kvmppc_tb_resync_required(void)
  {
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 45dd637d6b44b6133ed87a310b00d76f072aa05b..ea84696b0a3cbc3d6b181700a3ba27ca6257e647 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -47,8 +47,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
  #define NAPPING_NOVCPU 2
  
  /* Stack frame offsets for kvmppc_hv_entry */
-#define SFS                    160
+#define SFS                    208
  #define STACK_SLOT_TRAP                (SFS-4)
+#define STACK_SLOT_SHORT_PATH  (SFS-8)
  #define STACK_SLOT_TID         (SFS-16)
  #define STACK_SLOT_PSSCR       (SFS-24)
  #define STACK_SLOT_PID         (SFS-32)
@@ -57,6 +58,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
  #define STACK_SLOT_DAWR                (SFS-56)
  #define STACK_SLOT_DAWRX       (SFS-64)
  #define STACK_SLOT_HFSCR       (SFS-72)
+/* the following is used by the P9 short path */
+#define STACK_SLOT_NVGPRS      (SFS-152)       /* 18 gprs */
  
  /*
   * Call kvmppc_hv_entry in real mode.
@@ -1020,6 +1023,9 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
  no_xive:
  #endif /* CONFIG_KVM_XICS */
  
+       li      r0, 0
+       stw     r0, STACK_SLOT_SHORT_PATH(r1)
+
  deliver_guest_interrupt:       /* r4 = vcpu, r13 = paca */
         /* Check if we can deliver an external or decrementer interrupt now */
         ld      r0, VCPU_PENDING_EXC(r4)
@@ -1034,13 +1040,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         bl      kvmppc_guest_entry_inject_int
         ld      r4, HSTATE_KVM_VCPU(r13)
  71:
-       ld      r10, VCPU_PC(r4)
-       ld      r11, VCPU_MSR(r4)
         ld      r6, VCPU_SRR0(r4)
         ld      r7, VCPU_SRR1(r4)
         mtspr   SPRN_SRR0, r6
         mtspr   SPRN_SRR1, r7
  
+fast_guest_entry_c:
+       ld      r10, VCPU_PC(r4)
+       ld      r11, VCPU_MSR(r4)
         /* r11 = vcpu->arch.msr & ~MSR_HV */
         rldicl  r11, r11, 63 - MSR_HV_LG, 1
         rotldi  r11, r11, 1 + MSR_HV_LG
@@ -1117,6 +1124,83 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         HRFI_TO_GUEST
         b       .
  
+/*
+ * Enter the guest on a P9 or later system where we have exactly
+ * one vcpu per vcore and we don't need to go to real mode
+ * (which implies that host and guest are both using radix MMU mode).
+ * r3 = vcpu pointer
+ * Most SPRs and all the VSRs have been loaded already.
+ */
+_GLOBAL(__kvmhv_vcpu_entry_p9)
+EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
+       mflr    r0
+       std     r0, PPC_LR_STKOFF(r1)
+       stdu    r1, -SFS(r1)
+
+       li      r0, 1
+       stw     r0, STACK_SLOT_SHORT_PATH(r1)
+
+       std     r3, HSTATE_KVM_VCPU(r13)
+       mfcr    r4
+       stw     r4, SFS+8(r1)
+
+       std     r1, HSTATE_HOST_R1(r13)
+
+       reg = 14
+       .rept   18
+       std     reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+       reg = reg + 1
+       .endr
+
+       reg = 14
+       .rept   18
+       ld      reg, __VCPU_GPR(reg)(r3)
+       reg = reg + 1
+       .endr
+
+       mfmsr   r10
+       std     r10, HSTATE_HOST_MSR(r13)
+
+       mr      r4, r3
+       b       fast_guest_entry_c
+guest_exit_short_path:
+
+       li      r0, KVM_GUEST_MODE_NONE
+       stb     r0, HSTATE_IN_GUEST(r13)
+
+       reg = 14
+       .rept   18
+       std     reg, __VCPU_GPR(reg)(r9)
+       reg = reg + 1
+       .endr
+
+       reg = 14
+       .rept   18
+       ld      reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+       reg = reg + 1
+       .endr
+
+       lwz     r4, SFS+8(r1)
+       mtcr    r4
+
+       mr      r3, r12         /* trap number */
+
+       addi    r1, r1, SFS
+       ld      r0, PPC_LR_STKOFF(r1)
+       mtlr    r0
+
+       /* If we are in real mode, do a rfid to get back to the caller */
+       mfmsr   r4
+       andi.   r5, r4, MSR_IR
+       bnelr
+       rldicl  r5, r4, 64 - MSR_TS_S_LG, 62    /* extract TS field */
+       mtspr   SPRN_SRR0, r0
+       ld      r10, HSTATE_HOST_MSR(r13)
+       rldimi  r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+       mtspr   SPRN_SRR1, r10
+       RFI_TO_KERNEL
+       b       .
+
  secondary_too_late:
         li      r12, 0
         stw     r12, STACK_SLOT_TRAP(r1)
@@ -1377,6 +1461,11 @@ guest_exit_cont:         /* r9 = vcpu, r12 = trap, r13 = paca */
  1:
  #endif /* CONFIG_KVM_XICS */
  
+       /* If we came in through the P9 short path, go back out to C now */
+       lwz     r0, STACK_SLOT_SHORT_PATH(r1)
+       cmpwi   r0, 0
+       bne     guest_exit_short_path
+
         /* For hash guest, read the guest SLB and save it away */
         ld      r5, VCPU_KVM(r9)
         lbz     r0, KVM_RADIX(r5)
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c

index 30c2eb7669549a1076b1ce9776c55a8babfc7b21..ad4a370703d347b060b89a458d1d4b35e87ca5bd 100644 (file)
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -61,6 +61,69 @@
   */
  #define XIVE_Q_GAP     2
  
+/*
+ * Push a vcpu's context to the XIVE on guest entry.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
+{
+       void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+       u64 pq;
+
+       if (!tima)
+               return;
+       eieio();
+       __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+       __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
+       vcpu->arch.xive_pushed = 1;
+       eieio();
+
+       /*
+        * We clear the irq_pending flag. There is a small chance of a
+        * race vs. the escalation interrupt happening on another
+        * processor setting it again, but the only consequence is to
+        * cause a spurious wakeup on the next H_CEDE, which is not an
+        * issue.
+        */
+       vcpu->arch.irq_pending = 0;
+
+       /*
+        * In single escalation mode, if the escalation interrupt is
+        * on, we mask it.
+        */
+       if (vcpu->arch.xive_esc_on) {
+               pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
+                                                 XIVE_ESB_SET_PQ_01));
+               mb();
+
+               /*
+                * We have a possible subtle race here: The escalation
+                * interrupt might have fired and be on its way to the
+                * host queue while we mask it, and if we unmask it
+                * early enough (re-cede right away), there is a
+                * theorical possibility that it fires again, thus
+                * landing in the target queue more than once which is
+                * a big no-no.
+                *
+                * Fortunately, solving this is rather easy. If the
+                * above load setting PQ to 01 returns a previous
+                * value where P is set, then we know the escalation
+                * interrupt is somewhere on its way to the host. In
+                * that case we simply don't clear the xive_esc_on
+                * flag below. It will be eventually cleared by the
+                * handler for the escalation interrupt.
+                *
+                * Then, when doing a cede, we check that flag again
+                * before re-enabling the escalation interrupt, and if
+                * set, we abort the cede.
+                */
+               if (!(pq & XIVE_ESB_VAL_P))
+                       /* Now P is 0, we can clear the flag */
+                       vcpu->arch.xive_esc_on = 0;
+       }
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
+
  /*
   * This is a simple trigger for a generic XIVE IRQ. This must
   * only be called for interrupts that support a trigger page
author	Paul Mackerras <paulus@ozlabs.org>
	Mon, 8 Oct 2018 05:30:55 +0000 (16:30 +1100)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Tue, 9 Oct 2018 05:04:27 +0000 (16:04 +1100)
arch/powerpc/include/asm/asm-prototypes.h		patch \| blob \| history
arch/powerpc/include/asm/kvm_ppc.h		patch \| blob \| history
arch/powerpc/kvm/book3s_hv.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_ras.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv_rmhandlers.S		patch \| blob \| history
arch/powerpc/kvm/book3s_xive.c		patch \| blob \| history