KVM: PPC: Book3S HV: XIVE: Introduce a new capability KVM_CAP_PPC_IRQ_XIVE
authorCédric Le Goater <clg@kaod.org>
Thu, 18 Apr 2019 10:39:28 +0000 (12:39 +0200)
committerPaul Mackerras <paulus@ozlabs.org>
Tue, 30 Apr 2019 09:35:16 +0000 (19:35 +1000)
The user interface exposes a new capability KVM_CAP_PPC_IRQ_XIVE to
let QEMU connect the vCPU presenters to the XIVE KVM device if
required. The capability is not advertised for now as the full support
for the XIVE native exploitation mode is not yet available. When this
is case, the capability will be advertised on PowerNV Hypervisors
only. Nested guests (pseries KVM Hypervisor) are not supported.

Internally, the interface to the new KVM device is protected with a
new interrupt mode: KVMPPC_IRQ_XIVE.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Documentation/virtual/kvm/api.txt
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/kvm/book3s_xive.h
arch/powerpc/kvm/book3s_xive_native.c
arch/powerpc/kvm/powerpc.c
include/uapi/linux/kvm.h

index 67068c47c591a5ce8fc373313d46f434863ef54b..e38eb17b7be60f4f6de16f5d08855fbe507a4655 100644 (file)
@@ -4504,6 +4504,15 @@ struct kvm_sync_regs {
         struct kvm_vcpu_events events;
 };
 
+6.75 KVM_CAP_PPC_IRQ_XIVE
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] is the XIVE device fd
+            args[1] is the XIVE CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XIVE device.
+
 7. Capabilities that can be enabled on VMs
 ------------------------------------------
 
index 9074f1d7613cc15fe2a8954fe2c1c0d7b3f74f9a..eac25fd7e631ac16f2683a7deca4c095d9a28a2f 100644 (file)
@@ -453,6 +453,7 @@ struct kvmppc_passthru_irqmap {
 #define KVMPPC_IRQ_DEFAULT     0
 #define KVMPPC_IRQ_MPIC                1
 #define KVMPPC_IRQ_XICS                2 /* Includes a XIVE option */
+#define KVMPPC_IRQ_XIVE                3 /* XIVE native exploitation mode */
 
 #define MMIO_HPTE_CACHE_SIZE   4
 
index f746d85f36c7398fa2b79eb7e0edc773bc7a5109..9fc2753e516e3224c5a2549b45edbbee4bbfc02f 100644 (file)
@@ -594,6 +594,14 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
                               int level, bool line_status);
 extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
 
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
+}
+
+extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+                                          struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
 extern void kvmppc_xive_native_init_module(void);
 extern void kvmppc_xive_native_exit_module(void);
 
@@ -621,6 +629,11 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
                                      int level, bool line_status) { return -ENODEV; }
 static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
 
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+       { return 0; }
+static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+                         struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
 static inline void kvmppc_xive_native_init_module(void) { }
 static inline void kvmppc_xive_native_exit_module(void) { }
 
index f78d002f0fe0dce6cc311ae1322dc17c42b7bdd5..e7f1ada1c3decab1fdd928d18b2e466ab8e28d26 100644 (file)
@@ -380,11 +380,6 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
        return -EBUSY;
 }
 
-static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
-{
-       return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
-}
-
 static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
                             struct kvmppc_xive_src_block *sb,
                             struct kvmppc_xive_irq_state *state)
@@ -430,8 +425,8 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
         */
        if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
                xive_native_configure_irq(hw_num,
-                                         xive_vp(xive, state->act_server),
-                                         MASKED, state->number);
+                               kvmppc_xive_vp(xive, state->act_server),
+                               MASKED, state->number);
                /* set old_p so we can track if an H_EOI was done */
                state->old_p = true;
                state->old_q = false;
@@ -486,8 +481,8 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
         */
        if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
                xive_native_configure_irq(hw_num,
-                                         xive_vp(xive, state->act_server),
-                                         state->act_priority, state->number);
+                               kvmppc_xive_vp(xive, state->act_server),
+                               state->act_priority, state->number);
                /* If an EOI is needed, do it here */
                if (!state->old_p)
                        xive_vm_source_eoi(hw_num, xd);
@@ -563,7 +558,7 @@ static int xive_target_interrupt(struct kvm *kvm,
        kvmppc_xive_select_irq(state, &hw_num, NULL);
 
        return xive_native_configure_irq(hw_num,
-                                        xive_vp(xive, server),
+                                        kvmppc_xive_vp(xive, server),
                                         prio, state->number);
 }
 
@@ -951,7 +946,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
         * which is fine for a never started interrupt.
         */
        xive_native_configure_irq(hw_irq,
-                                 xive_vp(xive, state->act_server),
+                                 kvmppc_xive_vp(xive, state->act_server),
                                  state->act_priority, state->number);
 
        /*
@@ -1027,7 +1022,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
 
        /* Reconfigure the IPI */
        xive_native_configure_irq(state->ipi_number,
-                                 xive_vp(xive, state->act_server),
+                                 kvmppc_xive_vp(xive, state->act_server),
                                  state->act_priority, state->number);
 
        /*
@@ -1049,7 +1044,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
 }
 EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
 
-static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
        struct kvm *kvm = vcpu->kvm;
@@ -1166,7 +1161,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
        xc->xive = xive;
        xc->vcpu = vcpu;
        xc->server_num = cpu;
-       xc->vp_id = xive_vp(xive, cpu);
+       xc->vp_id = kvmppc_xive_vp(xive, cpu);
        xc->mfrr = 0xff;
        xc->valid = true;
 
@@ -1883,6 +1878,43 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
        return 0;
 }
 
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+       unsigned int i;
+
+       for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+               struct xive_q *q = &xc->queues[i];
+               u32 i0, i1, idx;
+
+               if (!q->qpage && !xc->esc_virq[i])
+                       continue;
+
+               seq_printf(m, " [q%d]: ", i);
+
+               if (q->qpage) {
+                       idx = q->idx;
+                       i0 = be32_to_cpup(q->qpage + idx);
+                       idx = (idx + 1) & q->msk;
+                       i1 = be32_to_cpup(q->qpage + idx);
+                       seq_printf(m, "T=%d %08x %08x...\n", q->toggle,
+                                  i0, i1);
+               }
+               if (xc->esc_virq[i]) {
+                       struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+                       struct xive_irq_data *xd =
+                               irq_data_get_irq_handler_data(d);
+                       u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+
+                       seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+                                  (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+                                  (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+                                  xc->esc_virq[i], pq, xd->eoi_page);
+                       seq_puts(m, "\n");
+               }
+       }
+       return 0;
+}
 
 static int xive_debug_show(struct seq_file *m, void *private)
 {
@@ -1908,7 +1940,6 @@ static int xive_debug_show(struct seq_file *m, void *private)
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
-               unsigned int i;
 
                if (!xc)
                        continue;
@@ -1918,33 +1949,8 @@ static int xive_debug_show(struct seq_file *m, void *private)
                           xc->server_num, xc->cppr, xc->hw_cppr,
                           xc->mfrr, xc->pending,
                           xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
-               for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
-                       struct xive_q *q = &xc->queues[i];
-                       u32 i0, i1, idx;
 
-                       if (!q->qpage && !xc->esc_virq[i])
-                               continue;
-
-                       seq_printf(m, " [q%d]: ", i);
-
-                       if (q->qpage) {
-                               idx = q->idx;
-                               i0 = be32_to_cpup(q->qpage + idx);
-                               idx = (idx + 1) & q->msk;
-                               i1 = be32_to_cpup(q->qpage + idx);
-                               seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
-                       }
-                       if (xc->esc_virq[i]) {
-                               struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
-                               struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
-                               u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
-                               seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
-                                          (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
-                                          (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
-                                          xc->esc_virq[i], pq, xd->eoi_page);
-                               seq_printf(m, "\n");
-                       }
-               }
+               kvmppc_xive_debug_show_queues(m, vcpu);
 
                t_rm_h_xirr += xc->stat_rm_h_xirr;
                t_rm_h_ipoll += xc->stat_rm_h_ipoll;
index a08ae6fd4c51fc54b9c79ffe48290b7f86b58956..d366df69b9cbacd4eab27ec066399c921f3fc9e8 100644 (file)
@@ -198,6 +198,11 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
        return xive->src_blocks[bid];
 }
 
+static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
+{
+       return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
+}
+
 /*
  * Mapping between guest priorities and host priorities
  * is as follow.
@@ -248,5 +253,11 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
 extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
 extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
 
+/*
+ * Common Xive routines for XICS-over-XIVE and XIVE native
+ */
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu);
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu);
+
 #endif /* CONFIG_KVM_XICS */
 #endif /* _KVM_PPC_BOOK3S_XICS_H */
index 751259394150da3a8dfec4e422e3ee4573402127..6fa73cfd9d9c7b87a849cfaf75a04956c0121c3c 100644 (file)
 
 #include "book3s_xive.h"
 
+static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
+{
+       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+       struct xive_q *q = &xc->queues[prio];
+
+       xive_native_disable_queue(xc->vp_id, q, prio);
+       if (q->qpage) {
+               put_page(virt_to_page(q->qpage));
+               q->qpage = NULL;
+       }
+}
+
+void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+       int i;
+
+       if (!kvmppc_xive_enabled(vcpu))
+               return;
+
+       if (!xc)
+               return;
+
+       pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+       /* Ensure no interrupt is still routed to that VP */
+       xc->valid = false;
+       kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+       /* Disable the VP */
+       xive_native_disable_vp(xc->vp_id);
+
+       /* Free the queues & associated interrupts */
+       for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+               /* Free the escalation irq */
+               if (xc->esc_virq[i]) {
+                       free_irq(xc->esc_virq[i], vcpu);
+                       irq_dispose_mapping(xc->esc_virq[i]);
+                       kfree(xc->esc_virq_names[i]);
+                       xc->esc_virq[i] = 0;
+               }
+
+               /* Free the queue */
+               kvmppc_xive_native_cleanup_queue(vcpu, i);
+       }
+
+       /* Free the VP */
+       kfree(xc);
+
+       /* Cleanup the vcpu */
+       vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+       vcpu->arch.xive_vcpu = NULL;
+}
+
+int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+                                   struct kvm_vcpu *vcpu, u32 server_num)
+{
+       struct kvmppc_xive *xive = dev->private;
+       struct kvmppc_xive_vcpu *xc = NULL;
+       int rc;
+
+       pr_devel("native_connect_vcpu(server=%d)\n", server_num);
+
+       if (dev->ops != &kvm_xive_native_ops) {
+               pr_devel("Wrong ops !\n");
+               return -EPERM;
+       }
+       if (xive->kvm != vcpu->kvm)
+               return -EPERM;
+       if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
+               return -EBUSY;
+       if (server_num >= KVM_MAX_VCPUS) {
+               pr_devel("Out of bounds !\n");
+               return -EINVAL;
+       }
+
+       mutex_lock(&vcpu->kvm->lock);
+
+       if (kvmppc_xive_find_server(vcpu->kvm, server_num)) {
+               pr_devel("Duplicate !\n");
+               rc = -EEXIST;
+               goto bail;
+       }
+
+       xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+       if (!xc) {
+               rc = -ENOMEM;
+               goto bail;
+       }
+
+       vcpu->arch.xive_vcpu = xc;
+       xc->xive = xive;
+       xc->vcpu = vcpu;
+       xc->server_num = server_num;
+
+       xc->vp_id = kvmppc_xive_vp(xive, server_num);
+       xc->valid = true;
+       vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
+
+       rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+       if (rc) {
+               pr_err("Failed to get VP info from OPAL: %d\n", rc);
+               goto bail;
+       }
+
+       /*
+        * Enable the VP first as the single escalation mode will
+        * affect escalation interrupts numbering
+        */
+       rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+       if (rc) {
+               pr_err("Failed to enable VP in OPAL: %d\n", rc);
+               goto bail;
+       }
+
+       /* Configure VCPU fields for use by assembly push/pull */
+       vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+       vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+       /* TODO: reset all queues to a clean state ? */
+bail:
+       mutex_unlock(&vcpu->kvm->lock);
+       if (rc)
+               kvmppc_xive_native_cleanup_vcpu(vcpu);
+
+       return rc;
+}
+
 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
                                       struct kvm_device_attr *attr)
 {
@@ -114,10 +242,32 @@ static int xive_native_debug_show(struct seq_file *m, void *private)
 {
        struct kvmppc_xive *xive = m->private;
        struct kvm *kvm = xive->kvm;
+       struct kvm_vcpu *vcpu;
+       unsigned int i;
 
        if (!kvm)
                return 0;
 
+       seq_puts(m, "=========\nVCPU state\n=========\n");
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+               if (!xc)
+                       continue;
+
+               seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+                          xc->server_num,
+                          vcpu->arch.xive_saved_state.nsr,
+                          vcpu->arch.xive_saved_state.cppr,
+                          vcpu->arch.xive_saved_state.ipb,
+                          vcpu->arch.xive_saved_state.pipr,
+                          vcpu->arch.xive_saved_state.w01,
+                          (u32) vcpu->arch.xive_cam_word);
+
+               kvmppc_xive_debug_show_queues(m, vcpu);
+       }
+
        return 0;
 }
 
index 8885377ec3e0c611b3ec3f14b8565e2f7ffde4aa..b0858ee6146046ed4a51a71d4835d4509c064981 100644 (file)
@@ -570,6 +570,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_PPC_GET_CPU_CHAR:
                r = 1;
                break;
+#ifdef CONFIG_KVM_XIVE
+       case KVM_CAP_PPC_IRQ_XIVE:
+               /*
+                * Return false until all the XIVE infrastructure is
+                * in place including support for migration.
+                */
+               r = 0;
+               break;
+#endif
 
        case KVM_CAP_PPC_ALLOC_HTAB:
                r = hv_enabled;
@@ -753,6 +762,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
                else
                        kvmppc_xics_free_icp(vcpu);
                break;
+       case KVMPPC_IRQ_XIVE:
+               kvmppc_xive_native_cleanup_vcpu(vcpu);
+               break;
        }
 
        kvmppc_core_vcpu_free(vcpu);
@@ -1941,6 +1953,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                break;
        }
 #endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+       case KVM_CAP_PPC_IRQ_XIVE: {
+               struct fd f;
+               struct kvm_device *dev;
+
+               r = -EBADF;
+               f = fdget(cap->args[0]);
+               if (!f.file)
+                       break;
+
+               r = -ENXIO;
+               if (!xive_enabled())
+                       break;
+
+               r = -EPERM;
+               dev = kvm_device_from_filp(f.file);
+               if (dev)
+                       r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
+                                                           cap->args[1]);
+
+               fdput(f);
+               break;
+       }
+#endif /* CONFIG_KVM_XIVE */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        case KVM_CAP_PPC_FWNMI:
                r = -EINVAL;
index e6368163d3a0d80fa587f2ded966a5785a412b82..52bf74a1616edc7430b5bc6c58a841000c474b56 100644 (file)
@@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_VM_IPA_SIZE 165
 #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
 #define KVM_CAP_HYPERV_CPUID 167
+#define KVM_CAP_PPC_IRQ_XIVE 168
 
 #ifdef KVM_CAP_IRQ_ROUTING