drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 7 Apr 2020 13:08:11 +0000 (14:08 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 7 Apr 2020 13:43:58 +0000 (14:43 +0100)
If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
user batch or in our own preamble, the engine raises a
GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
respond to a semaphore wait by yielding the timeslice, if we have
another context to yield to!

The only real complication is that the interrupt is only generated for
the start of the semaphore wait, and is asynchronous to our
process_csb() -- that is, we may not have registered the timeslice before
we see the interrupt. To ensure we don't miss a potential semaphore
blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
the interrupt and apply it to the next timeslice regardless of whether it
was active at the time.

v2: We use semaphores in preempt-to-busy, within the timeslicing
implementation itself! Ergo, when we do insert a preemption due to an
expired timeslice, the new context may start with the missed semaphore
flagged by the retired context and be yielded, ad infinitum. To avoid
this, read the context id at the time of the semaphore interrupt and
only yield if that context is still active.

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200407130811.17321-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_gt_irq.c
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/selftest_lrc.c
drivers/gpu/drm/i915/i915_reg.h

index 977e23fac5cea9ab673e30dd79b1bbdb1dcdbd05..b1f8527f02c8234a78cd06e465000f46194e313b 100644 (file)
@@ -1325,6 +1325,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 
        if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
                drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+       if (HAS_EXECLISTS(dev_priv)) {
+               drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+                          ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+               drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+                          ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+       }
        drm_printf(m, "\tRING_START: 0x%08x\n",
                   ENGINE_READ(engine, RING_START));
        drm_printf(m, "\tRING_HEAD:  0x%08x\n",
index de8e6edcf999f3b3cd745530405d95365da2147f..01d4bd781a2f5b65f3cded15226e09bfb762b145 100644 (file)
@@ -156,6 +156,15 @@ struct intel_engine_execlists {
         */
        struct i915_priolist default_priolist;
 
+       /**
+        * @yield: CCID at the time of the last semaphore-wait interrupt.
+        *
+        * Instead of leaving a semaphore busy-spinning on an engine, we would
+        * like to switch to another ready context, i.e. yielding the semaphore
+        * timeslice.
+        */
+       u32 yield;
+
        /**
         * @error_interrupt: CS Master EIR
         *
index f0e7fd95165a7d15058237e578cbb253c76c27d9..0cc7dd54f4f963ae6dc29698c3839a82e1667a0d 100644 (file)
@@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
                }
        }
 
+       if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+               WRITE_ONCE(engine->execlists.yield,
+                          ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+               ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+                            engine->execlists.yield);
+               if (del_timer(&engine->execlists.timer))
+                       tasklet = true;
+       }
+
        if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
                tasklet = true;
 
@@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
        const u32 irqs =
                GT_CS_MASTER_ERROR_INTERRUPT |
                GT_RENDER_USER_INTERRUPT |
-               GT_CONTEXT_SWITCH_INTERRUPT;
+               GT_CONTEXT_SWITCH_INTERRUPT |
+               GT_WAIT_SEMAPHORE_INTERRUPT;
        struct intel_uncore *uncore = gt->uncore;
        const u32 dmask = irqs << 16 | irqs;
        const u32 smask = irqs << 16;
@@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
        const u32 irqs =
                GT_CS_MASTER_ERROR_INTERRUPT |
                GT_RENDER_USER_INTERRUPT |
-               GT_CONTEXT_SWITCH_INTERRUPT;
+               GT_CONTEXT_SWITCH_INTERRUPT |
+               GT_WAIT_SEMAPHORE_INTERRUPT;
        const u32 gt_interrupts[] = {
                irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
                irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
index 19ffc77636833e215697252003b5cca54ec00d51..7adc73a5b7094b91c4787cf7b6cb14da77968142 100644 (file)
@@ -1768,7 +1768,8 @@ static void defer_active(struct intel_engine_cs *engine)
 }
 
 static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+              const struct i915_request *rq)
 {
        int hint;
 
@@ -1782,6 +1783,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
        return hint >= effective_prio(rq);
 }
 
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+               const struct i915_request *rq)
+{
+       /*
+        * Once bitten, forever smitten!
+        *
+        * If the active context ever busy-waited on a semaphore,
+        * it will be treated as a hog until the end of its timeslice (i.e.
+        * until it is scheduled out and replaced by a new submission,
+        * possibly even its own lite-restore). The HW only sends an interrupt
+        * on the first miss, and we do know if that semaphore has been
+        * signaled, or even if it is now stuck on another semaphore. Play
+        * safe, yield if it might be stuck -- it will be given a fresh
+        * timeslice in the near future.
+        */
+       return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+                 const struct i915_request *rq)
+{
+       return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
 static int
 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
 {
@@ -1797,8 +1824,7 @@ timeslice(const struct intel_engine_cs *engine)
        return READ_ONCE(engine->props.timeslice_duration_ms);
 }
 
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
 {
        const struct intel_engine_execlists *execlists = &engine->execlists;
        const struct i915_request *rq = *execlists->active;
@@ -1989,18 +2015,19 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
                        last = NULL;
                } else if (need_timeslice(engine, last) &&
-                          timer_expired(&engine->execlists.timer)) {
+                          timeslice_expired(execlists, last)) {
                        if (i915_request_completed(last)) {
                                tasklet_hi_schedule(&execlists->tasklet);
                                return;
                        }
 
                        ENGINE_TRACE(engine,
-                                    "expired last=%llx:%lld, prio=%d, hint=%d\n",
+                                    "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
                                     last->fence.context,
                                     last->fence.seqno,
                                     last->sched.attr.priority,
-                                    execlists->queue_priority_hint);
+                                    execlists->queue_priority_hint,
+                                    yesno(timeslice_yield(execlists, last)));
 
                        ring_set_paused(engine, 1);
                        defer_active(engine);
@@ -2261,6 +2288,7 @@ done:
                }
                clear_ports(port + 1, last_port - port);
 
+               WRITE_ONCE(execlists->yield, -1);
                execlists_submit_ports(engine);
                set_preempt_timeout(engine, *active);
        } else {
@@ -4563,6 +4591,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
        engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
        engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
        engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+       engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
index a9ccfae54c24789b9cd4176c7d04b701ed219495..43362b8a585594640abb0696e6071bcee9f483a8 100644 (file)
@@ -945,7 +945,7 @@ create_rewinder(struct intel_context *ce,
                        goto err;
        }
 
-       cs = intel_ring_begin(rq, 10);
+       cs = intel_ring_begin(rq, 14);
        if (IS_ERR(cs)) {
                err = PTR_ERR(cs);
                goto err;
@@ -957,8 +957,8 @@ create_rewinder(struct intel_context *ce,
        *cs++ = MI_SEMAPHORE_WAIT |
                MI_SEMAPHORE_GLOBAL_GTT |
                MI_SEMAPHORE_POLL |
-               MI_SEMAPHORE_SAD_NEQ_SDD;
-       *cs++ = 0;
+               MI_SEMAPHORE_SAD_GTE_SDD;
+       *cs++ = idx;
        *cs++ = offset;
        *cs++ = 0;
 
@@ -967,6 +967,11 @@ create_rewinder(struct intel_context *ce,
        *cs++ = offset + idx * sizeof(u32);
        *cs++ = 0;
 
+       *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+       *cs++ = offset;
+       *cs++ = 0;
+       *cs++ = idx + 1;
+
        intel_ring_advance(rq, cs);
 
        rq->sched.attr.priority = I915_PRIORITY_MASK;
@@ -1000,7 +1005,7 @@ static int live_timeslice_rewind(void *arg)
 
        for_each_engine(engine, gt, id) {
                enum { A1, A2, B1 };
-               enum { X = 1, Y, Z };
+               enum { X = 1, Z, Y };
                struct i915_request *rq[3] = {};
                struct intel_context *ce;
                unsigned long heartbeat;
@@ -1033,13 +1038,13 @@ static int live_timeslice_rewind(void *arg)
                        goto err;
                }
 
-               rq[0] = create_rewinder(ce, NULL, slot, 1);
+               rq[0] = create_rewinder(ce, NULL, slot, X);
                if (IS_ERR(rq[0])) {
                        intel_context_put(ce);
                        goto err;
                }
 
-               rq[1] = create_rewinder(ce, NULL, slot, 2);
+               rq[1] = create_rewinder(ce, NULL, slot, Y);
                intel_context_put(ce);
                if (IS_ERR(rq[1]))
                        goto err;
@@ -1057,7 +1062,7 @@ static int live_timeslice_rewind(void *arg)
                        goto err;
                }
 
-               rq[2] = create_rewinder(ce, rq[0], slot, 3);
+               rq[2] = create_rewinder(ce, rq[0], slot, Z);
                intel_context_put(ce);
                if (IS_ERR(rq[2]))
                        goto err;
@@ -1071,15 +1076,12 @@ static int live_timeslice_rewind(void *arg)
                GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
 
                /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
-               GEM_BUG_ON(!i915_request_is_active(rq[A1]));
-               GEM_BUG_ON(!i915_request_is_active(rq[A2]));
-               GEM_BUG_ON(!i915_request_is_active(rq[B1]));
-
-               /* Wait for the timeslice to kick in */
-               del_timer(&engine->execlists.timer);
-               tasklet_hi_schedule(&engine->execlists.tasklet);
-               intel_engine_flush_submission(engine);
-
+               if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
+                       /* Wait for the timeslice to kick in */
+                       del_timer(&engine->execlists.timer);
+                       tasklet_hi_schedule(&engine->execlists.tasklet);
+                       intel_engine_flush_submission(engine);
+               }
                /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
                GEM_BUG_ON(!i915_request_is_active(rq[A1]));
                GEM_BUG_ON(!i915_request_is_active(rq[B1]));
index 8cebb7a86b8c576e052bd68129f7c48d524e0ae6..1a7bd6db164b3d872c6a1badb64b016c20790f54 100644 (file)
@@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GT_BSD_CS_ERROR_INTERRUPT              (1 << 15)
 #define GT_BSD_USER_INTERRUPT                  (1 << 12)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_WAIT_SEMAPHORE_INTERRUPT            REG_BIT(11) /* bdw+ */
 #define GT_CONTEXT_SWITCH_INTERRUPT            (1 <<  8)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT    (1 <<  5) /* !snb */
 #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT     (1 <<  4)