KVM: PPC: Book3S: Simplify external interrupt handling
authorPaul Mackerras <paulus@ozlabs.org>
Mon, 8 Oct 2018 05:30:48 +0000 (16:30 +1100)
committerMichael Ellerman <mpe@ellerman.id.au>
Tue, 9 Oct 2018 05:04:27 +0000 (16:04 +1100)
Currently we use two bits in the vcpu pending_exceptions bitmap to
indicate that an external interrupt is pending for the guest, one
for "one-shot" interrupts that are cleared when delivered, and one
for interrupts that persist until cleared by an explicit action of
the OS (e.g. an acknowledge to an interrupt controller).  The
BOOK3S_IRQPRIO_EXTERNAL bit is used for one-shot interrupt requests
and BOOK3S_IRQPRIO_EXTERNAL_LEVEL is used for persisting interrupts.

In practice BOOK3S_IRQPRIO_EXTERNAL never gets used, because our
Book3S platforms generally, and pseries in particular, expect
external interrupt requests to persist until they are acknowledged
at the interrupt controller.  That combined with the confusion
introduced by having two bits for what is essentially the same thing
makes it attractive to simplify things by only using one bit.  This
patch does that.

With this patch there is only BOOK3S_IRQPRIO_EXTERNAL, and by default
it has the semantics of a persisting interrupt.  In order to avoid
breaking the ABI, we introduce a new "external_oneshot" flag which
preserves the behaviour of the KVM_INTERRUPT ioctl with the
KVM_INTERRUPT_SET argument.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/kvm_asm.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s_hv_rm_xics.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/book3s_xics.c
arch/powerpc/kvm/book3s_xive_template.c
arch/powerpc/kvm/trace_book3s.h
tools/perf/arch/powerpc/util/book3s_hv_exits.h

index a790d5cf6ea37da3bbb99e879cc59753757f1921..1f321914676d76aee079cff83a791890980e8c69 100644 (file)
@@ -84,7 +84,6 @@
 #define BOOK3S_INTERRUPT_INST_STORAGE  0x400
 #define BOOK3S_INTERRUPT_INST_SEGMENT  0x480
 #define BOOK3S_INTERRUPT_EXTERNAL      0x500
-#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL        0x501
 #define BOOK3S_INTERRUPT_EXTERNAL_HV   0x502
 #define BOOK3S_INTERRUPT_ALIGNMENT     0x600
 #define BOOK3S_INTERRUPT_PROGRAM       0x700
 #define BOOK3S_IRQPRIO_EXTERNAL                        14
 #define BOOK3S_IRQPRIO_DECREMENTER             15
 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR     16
-#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL          17
-#define BOOK3S_IRQPRIO_MAX                     18
+#define BOOK3S_IRQPRIO_MAX                     17
 
 #define BOOK3S_HFLAG_DCBZ32                    0x1
 #define BOOK3S_HFLAG_SLB                       0x2
index 906bcbdfd2a1be56771d30d8e19845e102e36c94..3cd0b9f45c2a888eb6babed564f6fd88ff3cbf33 100644 (file)
@@ -707,6 +707,7 @@ struct kvm_vcpu_arch {
        u8 hcall_needed;
        u8 epr_flags; /* KVMPPC_EPR_xxx */
        u8 epr_needed;
+       u8 external_oneshot;    /* clear external irq after delivery */
 
        u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
 
index 87348e498c89e917d4b2066074054fedb2d5cc18..66a55218e8dd0a6dd8edeb635bf608f0bd381204 100644 (file)
@@ -150,7 +150,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
        case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE;         break;
        case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT;         break;
        case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL;             break;
-       case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL;       break;
        case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT;            break;
        case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM;              break;
        case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL;           break;
@@ -236,18 +235,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
-       unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
-
-       if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
-               vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+       /*
+        * This case (KVM_INTERRUPT_SET) should never actually arise for
+        * a pseries guest (because pseries guests expect their interrupt
+        * controllers to continue asserting an external interrupt request
+        * until it is acknowledged at the interrupt controller), but is
+        * included to avoid ABI breakage and potentially for other
+        * sorts of guest.
+        *
+        * There is a subtlety here: HV KVM does not test the
+        * external_oneshot flag in the code that synthesizes
+        * external interrupts for the guest just before entering
+        * the guest.  That is OK even if userspace did do a
+        * KVM_INTERRUPT_SET on a pseries guest vcpu, because the
+        * caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick()
+        * which ends up doing a smp_send_reschedule(), which will
+        * pull the guest all the way out to the host, meaning that
+        * we will call kvmppc_core_prepare_to_enter() before entering
+        * the guest again, and that will handle the external_oneshot
+        * flag correctly.
+        */
+       if (irq->irq == KVM_INTERRUPT_SET)
+               vcpu->arch.external_oneshot = 1;
 
-       kvmppc_book3s_queue_irqprio(vcpu, vec);
+       kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 }
 
 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 {
        kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
-       kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
 }
 
 void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
@@ -278,7 +294,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
                vec = BOOK3S_INTERRUPT_DECREMENTER;
                break;
        case BOOK3S_IRQPRIO_EXTERNAL:
-       case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
                deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
                vec = BOOK3S_INTERRUPT_EXTERNAL;
                break;
@@ -352,8 +367,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
                case BOOK3S_IRQPRIO_DECREMENTER:
                        /* DEC interrupts get cleared by mtdec */
                        return false;
-               case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
-                       /* External interrupts get cleared by userspace */
+               case BOOK3S_IRQPRIO_EXTERNAL:
+                       /*
+                        * External interrupts get cleared by userspace
+                        * except when set by the KVM_INTERRUPT ioctl with
+                        * KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL).
+                        */
+                       if (vcpu->arch.external_oneshot) {
+                               vcpu->arch.external_oneshot = 0;
+                               return true;
+                       }
                        return false;
        }
 
index 758d1d23215e94b2feb25cf9a53fd778a4785f44..8b9f356896485f5bc9a50be052aab9ac6af05144 100644 (file)
@@ -136,7 +136,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 
        /* Mark the target VCPU as having an interrupt pending */
        vcpu->stat.queue_intr++;
-       set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+       set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
 
        /* Kick self ? Just set MER and return */
        if (vcpu == this_vcpu) {
@@ -170,8 +170,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
 {
        /* Note: Only called on self ! */
-       clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
-                 &vcpu->arch.pending_exceptions);
+       clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
        mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
 }
 
index 1d14046124a01afffda02a70d73e7666b7b4b6bc..77960e68f7b05dc70385d643f4cac51fce5252e5 100644 (file)
@@ -1122,11 +1122,11 @@ kvmppc_cede_reentry:            /* r4 = vcpu, r13 = paca */
 
        /* Check if we can deliver an external or decrementer interrupt now */
        ld      r0, VCPU_PENDING_EXC(r4)
-       rldicl  r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
+       rldicl  r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL, 63
        cmpdi   cr1, r0, 0
        andi.   r8, r11, MSR_EE
        mfspr   r8, SPRN_LPCR
-       /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
+       /* Insert EXTERNAL bit into LPCR at the MER bit position */
        rldimi  r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
        mtspr   SPRN_LPCR, r8
        isync
index 614ebb4261f76593bb07f52f2fd0a2db7307d4fe..059683e4e67a2ed6b1601dccb1dc9ab6a14460a8 100644 (file)
@@ -1246,7 +1246,6 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
                r = RESUME_GUEST;
                break;
        case BOOK3S_INTERRUPT_EXTERNAL:
-       case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
        case BOOK3S_INTERRUPT_EXTERNAL_HV:
        case BOOK3S_INTERRUPT_H_VIRT:
                vcpu->stat.ext_intr_exits++;
index b8356cdc0c043d2407b6cbbe172deec8b56f5dbf..d9ba1b06d0f5d396afec4280d9a1fb3ae4c48c58 100644 (file)
@@ -310,7 +310,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
         */
        if (new.out_ee) {
                kvmppc_book3s_queue_irqprio(icp->vcpu,
-                                           BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+                                           BOOK3S_INTERRUPT_EXTERNAL);
                if (!change_self)
                        kvmppc_fast_vcpu_kick(icp->vcpu);
        }
@@ -593,8 +593,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
        u32 xirr;
 
        /* First, remove EE from the processor */
-       kvmppc_book3s_dequeue_irqprio(icp->vcpu,
-                                     BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+       kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 
        /*
         * ICP State: Accept_Interrupt
@@ -754,8 +753,7 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
         * We can remove EE from the current processor, the update
         * transaction will set it again if needed
         */
-       kvmppc_book3s_dequeue_irqprio(icp->vcpu,
-                                     BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+       kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 
        do {
                old_state = new_state = READ_ONCE(icp->state);
@@ -1167,8 +1165,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
         * Deassert the CPU interrupt request.
         * icp_try_update will reassert it if necessary.
         */
-       kvmppc_book3s_dequeue_irqprio(icp->vcpu,
-                                     BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+       kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 
        /*
         * Note that if we displace an interrupt from old_state.xisr,
index 4171ede8722be6c424edcd5dc57485f22322dfd7..203ea654c81eed834b011ad8a9a63a623c023574 100644 (file)
@@ -285,7 +285,7 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
         * set by pull or an escalation interrupts).
         */
        if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
-               clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
+               clear_bit(BOOK3S_IRQPRIO_EXTERNAL,
                          &vcpu->arch.pending_exceptions);
 
        pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
index f3b23759e0172f7643ce849b186ec2b19ca03d3d..372a82fa2de378952c762a8225fe73c7f949f8cc 100644 (file)
@@ -14,7 +14,6 @@
        {0x400, "INST_STORAGE"}, \
        {0x480, "INST_SEGMENT"}, \
        {0x500, "EXTERNAL"}, \
-       {0x501, "EXTERNAL_LEVEL"}, \
        {0x502, "EXTERNAL_HV"}, \
        {0x600, "ALIGNMENT"}, \
        {0x700, "PROGRAM"}, \
index 853b95d1e13925eb091ce546c3ad8c77346699d3..2011376c7ab5887d37af1aa619b0e9dae5872fb1 100644 (file)
@@ -15,7 +15,6 @@
        {0x400, "INST_STORAGE"}, \
        {0x480, "INST_SEGMENT"}, \
        {0x500, "EXTERNAL"}, \
-       {0x501, "EXTERNAL_LEVEL"}, \
        {0x502, "EXTERNAL_HV"}, \
        {0x600, "ALIGNMENT"}, \
        {0x700, "PROGRAM"}, \