drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore

author Chris Wilson <chris@chris-wilson.co.uk>

Tue, 7 Apr 2020 13:08:11 +0000 (14:08 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Tue, 7 Apr 2020 13:43:58 +0000 (14:43 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Tue, 7 Apr 2020 13:08:11 +0000 (14:08 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Tue, 7 Apr 2020 13:43:58 +0000 (14:43 +0100)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c

index 977e23fac5cea9ab673e30dd79b1bbdb1dcdbd05..b1f8527f02c8234a78cd06e465000f46194e313b 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1325,6 +1325,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
  
         if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
                 drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+       if (HAS_EXECLISTS(dev_priv)) {
+               drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+                          ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+               drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+                          ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+       }
         drm_printf(m, "\tRING_START: 0x%08x\n",
                    ENGINE_READ(engine, RING_START));
         drm_printf(m, "\tRING_HEAD:  0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h

index de8e6edcf999f3b3cd745530405d95365da2147f..01d4bd781a2f5b65f3cded15226e09bfb762b145 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -156,6 +156,15 @@ struct intel_engine_execlists {
          */
         struct i915_priolist default_priolist;
  
+       /**
+        * @yield: CCID at the time of the last semaphore-wait interrupt.
+        *
+        * Instead of leaving a semaphore busy-spinning on an engine, we would
+        * like to switch to another ready context, i.e. yielding the semaphore
+        * timeslice.
+        */
+       u32 yield;
+
         /**
          * @error_interrupt: CS Master EIR
          *
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c

index f0e7fd95165a7d15058237e578cbb253c76c27d9..0cc7dd54f4f963ae6dc29698c3839a82e1667a0d 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
                 }
         }
  
+       if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+               WRITE_ONCE(engine->execlists.yield,
+                          ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+               ENGINE_TRACE(engine, "semaphore yield: %08x\n",
+                            engine->execlists.yield);
+               if (del_timer(&engine->execlists.timer))
+                       tasklet = true;
+       }
+
         if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
                 tasklet = true;
  
@@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
         const u32 irqs =
                 GT_CS_MASTER_ERROR_INTERRUPT |
                 GT_RENDER_USER_INTERRUPT |
-               GT_CONTEXT_SWITCH_INTERRUPT;
+               GT_CONTEXT_SWITCH_INTERRUPT |
+               GT_WAIT_SEMAPHORE_INTERRUPT;
         struct intel_uncore *uncore = gt->uncore;
         const u32 dmask = irqs << 16 | irqs;
         const u32 smask = irqs << 16;
@@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
         const u32 irqs =
                 GT_CS_MASTER_ERROR_INTERRUPT |
                 GT_RENDER_USER_INTERRUPT |
-               GT_CONTEXT_SWITCH_INTERRUPT;
+               GT_CONTEXT_SWITCH_INTERRUPT |
+               GT_WAIT_SEMAPHORE_INTERRUPT;
         const u32 gt_interrupts[] = {
                 irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
                 irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index 19ffc77636833e215697252003b5cca54ec00d51..7adc73a5b7094b91c4787cf7b6cb14da77968142 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1768,7 +1768,8 @@ static void defer_active(struct intel_engine_cs *engine)
  }
  
  static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+              const struct i915_request *rq)
  {
         int hint;
  
@@ -1782,6 +1783,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
         return hint >= effective_prio(rq);
  }
  
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+               const struct i915_request *rq)
+{
+       /*
+        * Once bitten, forever smitten!
+        *
+        * If the active context ever busy-waited on a semaphore,
+        * it will be treated as a hog until the end of its timeslice (i.e.
+        * until it is scheduled out and replaced by a new submission,
+        * possibly even its own lite-restore). The HW only sends an interrupt
+        * on the first miss, and we do know if that semaphore has been
+        * signaled, or even if it is now stuck on another semaphore. Play
+        * safe, yield if it might be stuck -- it will be given a fresh
+        * timeslice in the near future.
+        */
+       return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+                 const struct i915_request *rq)
+{
+       return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
  static int
  switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
  {
@@ -1797,8 +1824,7 @@ timeslice(const struct intel_engine_cs *engine)
         return READ_ONCE(engine->props.timeslice_duration_ms);
  }
  
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
  {
         const struct intel_engine_execlists *execlists = &engine->execlists;
         const struct i915_request *rq = *execlists->active;
@@ -1989,18 +2015,19 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
  
                         last = NULL;
                 } else if (need_timeslice(engine, last) &&
-                          timer_expired(&engine->execlists.timer)) {
+                          timeslice_expired(execlists, last)) {
                         if (i915_request_completed(last)) {
                                 tasklet_hi_schedule(&execlists->tasklet);
                                 return;
                         }
  
                         ENGINE_TRACE(engine,
-                                    "expired last=%llx:%lld, prio=%d, hint=%d\n",
+                                    "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
                                      last->fence.context,
                                      last->fence.seqno,
                                      last->sched.attr.priority,
-                                    execlists->queue_priority_hint);
+                                    execlists->queue_priority_hint,
+                                    yesno(timeslice_yield(execlists, last)));
  
                         ring_set_paused(engine, 1);
                         defer_active(engine);
@@ -2261,6 +2288,7 @@ done:
                 }
                 clear_ports(port + 1, last_port - port);
  
+               WRITE_ONCE(execlists->yield, -1);
                 execlists_submit_ports(engine);
                 set_preempt_timeout(engine, *active);
         } else {
@@ -4563,6 +4591,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
         engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
         engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
         engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+       engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
  }
  
  static void rcs_submission_override(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c

index a9ccfae54c24789b9cd4176c7d04b701ed219495..43362b8a585594640abb0696e6071bcee9f483a8 100644 (file)
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -945,7 +945,7 @@ create_rewinder(struct intel_context *ce,
                         goto err;
         }
  
-       cs = intel_ring_begin(rq, 10);
+       cs = intel_ring_begin(rq, 14);
         if (IS_ERR(cs)) {
                 err = PTR_ERR(cs);
                 goto err;
@@ -957,8 +957,8 @@ create_rewinder(struct intel_context *ce,
         *cs++ = MI_SEMAPHORE_WAIT |
                 MI_SEMAPHORE_GLOBAL_GTT |
                 MI_SEMAPHORE_POLL |
-               MI_SEMAPHORE_SAD_NEQ_SDD;
-       *cs++ = 0;
+               MI_SEMAPHORE_SAD_GTE_SDD;
+       *cs++ = idx;
         *cs++ = offset;
         *cs++ = 0;
  
@@ -967,6 +967,11 @@ create_rewinder(struct intel_context *ce,
         *cs++ = offset + idx * sizeof(u32);
         *cs++ = 0;
  
+       *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+       *cs++ = offset;
+       *cs++ = 0;
+       *cs++ = idx + 1;
+
         intel_ring_advance(rq, cs);
  
         rq->sched.attr.priority = I915_PRIORITY_MASK;
@@ -1000,7 +1005,7 @@ static int live_timeslice_rewind(void *arg)
  
         for_each_engine(engine, gt, id) {
                 enum { A1, A2, B1 };
-               enum { X = 1, Y, Z };
+               enum { X = 1, Z, Y };
                 struct i915_request *rq[3] = {};
                 struct intel_context *ce;
                 unsigned long heartbeat;
@@ -1033,13 +1038,13 @@ static int live_timeslice_rewind(void *arg)
                         goto err;
                 }
  
-               rq[0] = create_rewinder(ce, NULL, slot, 1);
+               rq[0] = create_rewinder(ce, NULL, slot, X);
                 if (IS_ERR(rq[0])) {
                         intel_context_put(ce);
                         goto err;
                 }
  
-               rq[1] = create_rewinder(ce, NULL, slot, 2);
+               rq[1] = create_rewinder(ce, NULL, slot, Y);
                 intel_context_put(ce);
                 if (IS_ERR(rq[1]))
                         goto err;
@@ -1057,7 +1062,7 @@ static int live_timeslice_rewind(void *arg)
                         goto err;
                 }
  
-               rq[2] = create_rewinder(ce, rq[0], slot, 3);
+               rq[2] = create_rewinder(ce, rq[0], slot, Z);
                 intel_context_put(ce);
                 if (IS_ERR(rq[2]))
                         goto err;
@@ -1071,15 +1076,12 @@ static int live_timeslice_rewind(void *arg)
                 GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
  
                 /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
-               GEM_BUG_ON(!i915_request_is_active(rq[A1]));
-               GEM_BUG_ON(!i915_request_is_active(rq[A2]));
-               GEM_BUG_ON(!i915_request_is_active(rq[B1]));
-
-               /* Wait for the timeslice to kick in */
-               del_timer(&engine->execlists.timer);
-               tasklet_hi_schedule(&engine->execlists.tasklet);
-               intel_engine_flush_submission(engine);
-
+               if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */
+                       /* Wait for the timeslice to kick in */
+                       del_timer(&engine->execlists.timer);
+                       tasklet_hi_schedule(&engine->execlists.tasklet);
+                       intel_engine_flush_submission(engine);
+               }
                 /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
                 GEM_BUG_ON(!i915_request_is_active(rq[A1]));
                 GEM_BUG_ON(!i915_request_is_active(rq[B1]));
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index 8cebb7a86b8c576e052bd68129f7c48d524e0ae6..1a7bd6db164b3d872c6a1badb64b016c20790f54 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
  #define GT_BSD_CS_ERROR_INTERRUPT              (1 << 15)
  #define GT_BSD_USER_INTERRUPT                  (1 << 12)
  #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_WAIT_SEMAPHORE_INTERRUPT            REG_BIT(11) /* bdw+ */
  #define GT_CONTEXT_SWITCH_INTERRUPT            (1 <<  8)
  #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT    (1 <<  5) /* !snb */
  #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT     (1 <<  4)
author	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 7 Apr 2020 13:08:11 +0000 (14:08 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 7 Apr 2020 13:43:58 +0000 (14:43 +0100)
drivers/gpu/drm/i915/gt/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_types.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_gt_irq.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/selftest_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_reg.h		patch \| blob \| history