drm/i915/gt: Push engine stopping into reset-prepare
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 16 Jul 2019 12:49:28 +0000 (13:49 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 17 Jul 2019 17:47:00 +0000 (18:47 +0100)
Push the engine stop into the back reset_prepare (where it already was!)
This allows us to avoid dangerously setting the RING registers to 0 for
logical contexts. If we clear the register on a live context, those
invalid register values are recorded in the logical context state and
replayed (with hilarious results).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190716124931.5870-2-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/intel_ringbuffer.c

index 6564b5da224fef92bd19dd950f6d32361c445238..d076d9148b6dc0c4fa2e5839ca7ac335cd262b9a 100644 (file)
@@ -2183,11 +2183,23 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
        __tasklet_disable_sync_once(&execlists->tasklet);
        GEM_BUG_ON(!reset_in_progress(execlists));
 
-       intel_engine_stop_cs(engine);
-
        /* And flush any current direct submission. */
        spin_lock_irqsave(&engine->active.lock, flags);
        spin_unlock_irqrestore(&engine->active.lock, flags);
+
+       /*
+        * We stop engines, otherwise we might get failed reset and a
+        * dead gpu (on elk). Also as modern gpu as kbl can suffer
+        * from system hang if batchbuffer is progressing when
+        * the reset is issued, regardless of READY_TO_RESET ack.
+        * Thus assume it is best to stop engines on all gens
+        * where we have a gpu reset.
+        *
+        * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+        *
+        * FIXME: Wa for more modern gens needs to be validated
+        */
+       intel_engine_stop_cs(engine);
 }
 
 static void reset_csb_pointers(struct intel_engine_cs *engine)
index 7ddedfb16aa2045dba4c32a1d4b92da5c35dc08a..55e2ddcbd2158ce192298fac982d92eb64315ab6 100644 (file)
@@ -135,47 +135,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
        }
 }
 
-static void gen3_stop_engine(struct intel_engine_cs *engine)
-{
-       struct intel_uncore *uncore = engine->uncore;
-       const u32 base = engine->mmio_base;
-
-       GEM_TRACE("%s\n", engine->name);
-
-       if (intel_engine_stop_cs(engine))
-               GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
-
-       intel_uncore_write_fw(uncore,
-                             RING_HEAD(base),
-                             intel_uncore_read_fw(uncore, RING_TAIL(base)));
-       intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
-
-       intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
-       intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
-       intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
-
-       /* The ring must be empty before it is disabled */
-       intel_uncore_write_fw(uncore, RING_CTL(base), 0);
-
-       /* Check acts as a post */
-       if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
-               GEM_TRACE("%s: ring head [%x] not parked\n",
-                         engine->name,
-                         intel_uncore_read_fw(uncore, RING_HEAD(base)));
-}
-
-static void stop_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask)
-{
-       struct intel_engine_cs *engine;
-       intel_engine_mask_t tmp;
-
-       if (INTEL_GEN(gt->i915) < 3)
-               return;
-
-       for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
-               gen3_stop_engine(engine);
-}
-
 static bool i915_in_reset(struct pci_dev *pdev)
 {
        u8 gdrst;
@@ -607,23 +566,6 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
         */
        intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
        for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
-               /*
-                * We stop engines, otherwise we might get failed reset and a
-                * dead gpu (on elk). Also as modern gpu as kbl can suffer
-                * from system hang if batchbuffer is progressing when
-                * the reset is issued, regardless of READY_TO_RESET ack.
-                * Thus assume it is best to stop engines on all gens
-                * where we have a gpu reset.
-                *
-                * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
-                *
-                * WaMediaResetMainRingCleanup:ctg,elk (presumably)
-                *
-                * FIXME: Wa for more modern gens needs to be validated
-                */
-               if (retry)
-                       stop_engines(gt, engine_mask);
-
                GEM_TRACE("engine_mask=%x\n", engine_mask);
                preempt_disable();
                ret = reset(gt, engine_mask, retry);
index f1e571fa2e6d370d357b28af00a6c690a247ecef..213df144be15266f9c6051f3d44da61bf6cb4209 100644 (file)
@@ -739,7 +739,45 @@ out:
 
 static void reset_prepare(struct intel_engine_cs *engine)
 {
-       intel_engine_stop_cs(engine);
+       struct intel_uncore *uncore = engine->uncore;
+       const u32 base = engine->mmio_base;
+
+       /*
+        * We stop engines, otherwise we might get failed reset and a
+        * dead gpu (on elk). Also as modern gpu as kbl can suffer
+        * from system hang if batchbuffer is progressing when
+        * the reset is issued, regardless of READY_TO_RESET ack.
+        * Thus assume it is best to stop engines on all gens
+        * where we have a gpu reset.
+        *
+        * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+        *
+        * WaMediaResetMainRingCleanup:ctg,elk (presumably)
+        *
+        * FIXME: Wa for more modern gens needs to be validated
+        */
+       GEM_TRACE("%s\n", engine->name);
+
+       if (intel_engine_stop_cs(engine))
+               GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
+
+       intel_uncore_write_fw(uncore,
+                             RING_HEAD(base),
+                             intel_uncore_read_fw(uncore, RING_TAIL(base)));
+       intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
+
+       intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
+       intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
+       intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
+
+       /* The ring must be empty before it is disabled */
+       intel_uncore_write_fw(uncore, RING_CTL(base), 0);
+
+       /* Check acts as a post */
+       if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
+               GEM_TRACE("%s: ring head [%x] not parked\n",
+                         engine->name,
+                         intel_uncore_read_fw(uncore, RING_HEAD(base)));
 }
 
 static void reset_ring(struct intel_engine_cs *engine, bool stalled)