drm/i915/selftests: Include the trace as a debug aide
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 22 Mar 2018 07:49:08 +0000 (07:49 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 22 Mar 2018 20:34:39 +0000 (20:34 +0000)
If we fail to reset the GPU in a timely fashion, dump the GEM trace so
that we can see what operations were in flight when the GPU got stuck.

v2: There's more than one timeout that deserves tracing!
v3: Silence checkpatch by not even using a product at all!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Jeff McGee <jeff.mcgee@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180322074908.10838-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/selftests/intel_hangcheck.c

index 4372826998aab563a626714c04805ac4bbd11915..9b235dae8dd9fa895582dd8dc728e6d47cbe1a9e 100644 (file)
@@ -260,8 +260,11 @@ static void wedge_me(struct work_struct *work)
 {
        struct wedge_me *w = container_of(work, typeof(*w), work.work);
 
-       pr_err("%pS timed out, cancelling all further testing.\n",
-              w->symbol);
+       pr_err("%pS timed out, cancelling all further testing.\n", w->symbol);
+
+       GEM_TRACE("%pS timed out.\n", w->symbol);
+       GEM_TRACE_DUMP();
+
        i915_gem_set_wedged(w->i915);
 }
 
@@ -621,9 +624,19 @@ static int active_engine(void *data)
                mutex_unlock(&engine->i915->drm.struct_mutex);
 
                if (old) {
-                       i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT);
+                       if (i915_request_wait(old, 0, HZ) < 0) {
+                               GEM_TRACE("%s timed out.\n", engine->name);
+                               GEM_TRACE_DUMP();
+
+                               i915_gem_set_wedged(engine->i915);
+                               i915_request_put(old);
+                               err = -EIO;
+                               break;
+                       }
                        i915_request_put(old);
                }
+
+               cond_resched();
        }
 
        for (count = 0; count < ARRAY_SIZE(rq); count++)
@@ -1126,6 +1139,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 
        err = i915_subtests(tests, i915);
 
+       mutex_lock(&i915->drm.struct_mutex);
+       flush_test(i915, I915_WAIT_LOCKED);
+       mutex_unlock(&i915->drm.struct_mutex);
+
        i915_modparams.enable_hangcheck = saved_hangcheck;
        intel_runtime_pm_put(i915);