From c30d5dc653cbc78f9b634b7b72e25057a68c527c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 Jul 2019 13:49:28 +0100 Subject: [PATCH] drm/i915/gt: Push engine stopping into reset-prepare Push the engine stop into the back reset_prepare (where it already was!) This allows us to avoid dangerously setting the RING registers to 0 for logical contexts. If we clear the register on a live context, those invalid register values are recorded in the logical context state and replayed (with hilarious results). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190716124931.5870-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 16 +++++- drivers/gpu/drm/i915/gt/intel_reset.c | 58 ---------------------- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 40 ++++++++++++++- 3 files changed, 53 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 6564b5da224f..d076d9148b6d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2183,11 +2183,23 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) __tasklet_disable_sync_once(&execlists->tasklet); GEM_BUG_ON(!reset_in_progress(execlists)); - intel_engine_stop_cs(engine); - /* And flush any current direct submission. */ spin_lock_irqsave(&engine->active.lock, flags); spin_unlock_irqrestore(&engine->active.lock, flags); + + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * FIXME: Wa for more modern gens needs to be validated + */ + intel_engine_stop_cs(engine); } static void reset_csb_pointers(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 7ddedfb16aa2..55e2ddcbd215 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -135,47 +135,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) } } -static void gen3_stop_engine(struct intel_engine_cs *engine) -{ - struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; - - GEM_TRACE("%s\n", engine->name); - - if (intel_engine_stop_cs(engine)) - GEM_TRACE("%s: timed out on STOP_RING\n", engine->name); - - intel_uncore_write_fw(uncore, - RING_HEAD(base), - intel_uncore_read_fw(uncore, RING_TAIL(base))); - intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ - - intel_uncore_write_fw(uncore, RING_HEAD(base), 0); - intel_uncore_write_fw(uncore, RING_TAIL(base), 0); - intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); - - /* The ring must be empty before it is disabled */ - intel_uncore_write_fw(uncore, RING_CTL(base), 0); - - /* Check acts as a post */ - if (intel_uncore_read_fw(uncore, RING_HEAD(base))) - GEM_TRACE("%s: ring head [%x] not parked\n", - engine->name, - intel_uncore_read_fw(uncore, RING_HEAD(base))); -} - -static void stop_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (INTEL_GEN(gt->i915) < 3) - return; - - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) - gen3_stop_engine(engine); -} - static bool i915_in_reset(struct pci_dev *pdev) { u8 gdrst; @@ -607,23 +566,6 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { - /* - * We stop engines, otherwise we might get failed reset and a - * dead gpu (on elk). Also as modern gpu as kbl can suffer - * from system hang if batchbuffer is progressing when - * the reset is issued, regardless of READY_TO_RESET ack. - * Thus assume it is best to stop engines on all gens - * where we have a gpu reset. - * - * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) - * - * WaMediaResetMainRingCleanup:ctg,elk (presumably) - * - * FIXME: Wa for more modern gens needs to be validated - */ - if (retry) - stop_engines(gt, engine_mask); - GEM_TRACE("engine_mask=%x\n", engine_mask); preempt_disable(); ret = reset(gt, engine_mask, retry); diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index f1e571fa2e6d..213df144be15 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -739,7 +739,45 @@ out: static void reset_prepare(struct intel_engine_cs *engine) { - intel_engine_stop_cs(engine); + struct intel_uncore *uncore = engine->uncore; + const u32 base = engine->mmio_base; + + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * WaMediaResetMainRingCleanup:ctg,elk (presumably) + * + * FIXME: Wa for more modern gens needs to be validated + */ + GEM_TRACE("%s\n", engine->name); + + if (intel_engine_stop_cs(engine)) + GEM_TRACE("%s: timed out on STOP_RING\n", engine->name); + + intel_uncore_write_fw(uncore, + RING_HEAD(base), + intel_uncore_read_fw(uncore, RING_TAIL(base))); + intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */ + + intel_uncore_write_fw(uncore, RING_HEAD(base), 0); + intel_uncore_write_fw(uncore, RING_TAIL(base), 0); + intel_uncore_posting_read_fw(uncore, RING_TAIL(base)); + + /* The ring must be empty before it is disabled */ + intel_uncore_write_fw(uncore, RING_CTL(base), 0); + + /* Check acts as a post */ + if (intel_uncore_read_fw(uncore, RING_HEAD(base))) + GEM_TRACE("%s: ring head [%x] not parked\n", + engine->name, + intel_uncore_read_fw(uncore, RING_HEAD(base))); } static void reset_ring(struct intel_engine_cs *engine, bool stalled) -- 2.30.2