From 41a1bde3671582bd0fc9e57f1f1c355a24db6bc4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Mar 2019 16:28:35 +0000 Subject: [PATCH] drm/i915: Always kick the execlists tasklet after reset With direct submission being disabled while the reset in progress, we have a small window where we may forgo the submission of a new request and not notice its addition during execlists_reset_finish. To close this window, always schedule the submission tasklet on coming out of reset to catch any residual work. <6> [333.144082] i915: Running intel_hangcheck_live_selftests/igt_reset_engines <3> [333.296927] i915_reset_engine(rcs0:idle): failed to idle after reset <6> [333.296932] i915 0000:00:02.0: [drm] rcs0 <6> [333.296934] i915 0000:00:02.0: [drm] Hangcheck 0:a9ddf7a5 [4157 ms] <6> [333.296936] i915 0000:00:02.0: [drm] Reset count: 36048 (global 754) <6> [333.296938] i915 0000:00:02.0: [drm] Requests: <6> [333.296997] i915 0000:00:02.0: [drm] RING_START: 0x00000000 <6> [333.296999] i915 0000:00:02.0: [drm] RING_HEAD: 0x00000000 <6> [333.297001] i915 0000:00:02.0: [drm] RING_TAIL: 0x00000000 <6> [333.297003] i915 0000:00:02.0: [drm] RING_CTL: 0x00000000 <6> [333.297005] i915 0000:00:02.0: [drm] RING_MODE: 0x00000200 [idle] <6> [333.297007] i915 0000:00:02.0: [drm] RING_IMR: fffffeff <6> [333.297010] i915 0000:00:02.0: [drm] ACTHD: 0x00000000_00000000 <6> [333.297012] i915 0000:00:02.0: [drm] BBADDR: 0x00000000_00000000 <6> [333.297015] i915 0000:00:02.0: [drm] DMA_FADDR: 0x00000000_00000000 <6> [333.297017] i915 0000:00:02.0: [drm] IPEIR: 0x00000000 <6> [333.297019] i915 0000:00:02.0: [drm] IPEHR: 0x00000000 <6> [333.297021] i915 0000:00:02.0: [drm] Execlist status: 0x00000001 00000000 <6> [333.297023] i915 0000:00:02.0: [drm] Execlist CSB read 5, write 5 [mmio:7], tasklet queued? no (enabled) <6> [333.297025] i915 0000:00:02.0: [drm] ELSP[0] idle <6> [333.297027] i915 0000:00:02.0: [drm] ELSP[1] idle <6> [333.297028] i915 0000:00:02.0: [drm] HW active? 0x0 <6> [333.297044] i915 0000:00:02.0: [drm] Queue priority hint: -8186 <6> [333.297067] i915 0000:00:02.0: [drm] Q 2afac:5f2+ prio=-8186 @ 50ms: (null) <6> [333.297068] i915 0000:00:02.0: [drm] HWSP: <6> [333.297071] i915 0000:00:02.0: [drm] [0000] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 <6> [333.297073] i915 0000:00:02.0: [drm] * <6> [333.297075] i915 0000:00:02.0: [drm] [0040] 00000001 00000000 00000018 00000002 00000001 00000000 00000018 00000000 <6> [333.297077] i915 0000:00:02.0: [drm] [0060] 00000001 00000000 00008002 00000002 00000000 00000000 00000000 00000005 <6> [333.297079] i915 0000:00:02.0: [drm] [0080] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 <6> [333.297081] i915 0000:00:02.0: [drm] * <6> [333.297083] i915 0000:00:02.0: [drm] [00c0] 00000000 00000000 00000000 00000000 a9ddf7a5 00000000 00000000 00000000 <6> [333.297085] i915 0000:00:02.0: [drm] [00e0] 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 <6> [333.297087] i915 0000:00:02.0: [drm] * <6> [333.297089] i915 0000:00:02.0: [drm] Idle? no <6> [333.297090] i915_reset_engine(rcs0:idle): 3000 resets <3> [333.297092] i915/intel_hangcheck_live_selftests: igt_reset_engines failed with error -5 <3> [333.455460] i915 0000:00:02.0: Failed to idle engines, declaring wedged! ... <0> [333.491294] i915_sel-4916 1.... 333262143us : i915_reset_engine: rcs0 flags=4 <0> [333.491328] i915_sel-4916 1.... 333262143us : execlists_reset_prepare: rcs0: depth<-0 <0> [333.491362] i915_sel-4916 1.... 333262143us : intel_engine_stop_cs: rcs0 <0> [333.491396] i915_sel-4916 1d..1 333262144us : process_csb: rcs0 cs-irq head=5, tail=5 <0> [333.491424] i915_sel-4916 1.... 333262145us : intel_gpu_reset: engine_mask=1 <0> [333.491454] kworker/-214 5.... 333262184us : i915_gem_switch_to_kernel_context: awake?=yes <0> [333.491487] kworker/-214 5.... 333262192us : i915_request_add: rcs0 fence 2afac:1522 <0> [333.491520] kworker/-214 5.... 333262193us : i915_request_add: marking (null) as active <0> [333.491553] i915_sel-4916 1.... 333262199us : intel_engine_cancel_stop_cs: rcs0 <0> [333.491587] i915_sel-4916 1.... 333262199us : execlists_reset_finish: rcs0: depth->0 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190313162835.30228-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.h | 7 ++++++- drivers/gpu/drm/i915/intel_lrc.c | 5 +++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 74a2ddc1b52f..5c073fe73664 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -82,7 +82,7 @@ void i915_gem_unpark(struct drm_i915_private *i915); static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { - if (atomic_inc_return(&t->count) == 1) + if (!atomic_fetch_inc(&t->count)) tasklet_unlock_wait(t); } @@ -91,4 +91,9 @@ static inline bool __tasklet_is_enabled(const struct tasklet_struct *t) return !atomic_read(&t->count); } +static inline bool __tasklet_enable(struct tasklet_struct *t) +{ + return atomic_dec_and_test(&t->count); +} + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 11b81fd15fab..e54e0064b2d6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2024,13 +2024,14 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) * After a GPU reset, we may have requests to replay. Do so now while * we still have the forcewake to be sure that the GPU is not allowed * to sleep before we restart and reload a context. - * */ GEM_BUG_ON(!reset_in_progress(execlists)); if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) execlists->tasklet.func(execlists->tasklet.data); - tasklet_enable(&execlists->tasklet); + if (__tasklet_enable(&execlists->tasklet)) + /* And kick in case we missed a new request submission. */ + tasklet_hi_schedule(&execlists->tasklet); GEM_TRACE("%s: depth->%d\n", engine->name, atomic_read(&execlists->tasklet.count)); } -- 2.30.2