drm/i915: Prevent lock-cycles between GPU waits and GPU resets
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 12 Jun 2019 08:52:46 +0000 (09:52 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 12 Jun 2019 11:06:11 +0000 (12:06 +0100)
We cannot allow ourselves to wait on the GPU while holding any lock as we
may need to reset the GPU. While there is not an explicit lock between
the two operations, lockdep cannot detect the dependency. So let's tell
lockdep about the wait/reset dependency with an explicit lockmap.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190612085246.16374-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/selftests/mock_gem_device.c

index 60d24110af80eda98d8e818ec230c6e10212209a..6368b37f26d15b4cfb7cc10336d1813c8348d7d7 100644 (file)
@@ -978,10 +978,11 @@ void i915_reset(struct drm_i915_private *i915,
 
        might_sleep();
        GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
+       lock_map_acquire(&i915->gt.reset_lockmap);
 
        /* Clear any previous failed attempts at recovery. Time to try again. */
        if (!__i915_gem_unset_wedged(i915))
-               return;
+               goto unlock;
 
        if (reason)
                dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
@@ -1029,6 +1030,8 @@ void i915_reset(struct drm_i915_private *i915,
 
 finish:
        reset_finish(i915);
+unlock:
+       lock_map_release(&i915->gt.reset_lockmap);
        return;
 
 taint:
index 0ea7f78ae227a5200b50468ef5fd121adb2dabcd..2e2cfbfa0ceddb2e3d40a9e78d13dc49ecf2d7d0 100644 (file)
@@ -1919,6 +1919,14 @@ struct drm_i915_private {
                ktime_t last_init_time;
 
                struct i915_vma *scratch;
+
+               /*
+                * We must never wait on the GPU while holding a lock as we
+                * may need to perform a GPU reset. So while we don't need to
+                * serialise wait/reset with an explicit lock, we do want
+                * lockdep to detect potential dependency cycles.
+                */
+               struct lockdep_map reset_lockmap;
        } gt;
 
        struct {
index e980c1ee3dcf790bdbd2c30af3a2efeb6b6e948d..24f0f3db1bfb98cd4eae90a50cdd6c2d9f8d3182 100644 (file)
@@ -1782,6 +1782,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
 
 int i915_gem_init_early(struct drm_i915_private *dev_priv)
 {
+       static struct lock_class_key reset_key;
        int err;
 
        intel_gt_pm_init(dev_priv);
@@ -1789,6 +1790,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
        INIT_LIST_HEAD(&dev_priv->gt.active_rings);
        INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
        spin_lock_init(&dev_priv->gt.closed_lock);
+       lockdep_init_map(&dev_priv->gt.reset_lockmap,
+                        "i915.reset", &reset_key, 0);
 
        i915_gem_init__mm(dev_priv);
        i915_gem_init__pm(dev_priv);
index e9b59eea4f1046ae65f1fcfcd75c596e1453af51..1cbc3ef4fc27b36015347bb1f9df246d30749940 100644 (file)
@@ -1444,6 +1444,7 @@ long i915_request_wait(struct i915_request *rq,
                return -ETIME;
 
        trace_i915_request_wait_begin(rq, flags);
+       lock_map_acquire(&rq->i915->gt.reset_lockmap);
 
        /*
         * Optimistic spin before touching IRQs.
@@ -1517,6 +1518,7 @@ long i915_request_wait(struct i915_request *rq,
        dma_fence_remove_callback(&rq->fence, &wait.cb);
 
 out:
+       lock_map_release(&rq->i915->gt.reset_lockmap);
        trace_i915_request_wait_end(rq);
        return timeout;
 }
index b7f3fbb4ae89f43f7f63c5f9cb82fd1c50e0c8db..1e9ffced78c129064aaf96e00ec1a8009cb209f5 100644 (file)
@@ -130,6 +130,7 @@ static struct dev_pm_domain pm_domain = {
 
 struct drm_i915_private *mock_gem_device(void)
 {
+       static struct lock_class_key reset_key;
        struct drm_i915_private *i915;
        struct pci_dev *pdev;
        int err;
@@ -204,6 +205,7 @@ struct drm_i915_private *mock_gem_device(void)
        INIT_LIST_HEAD(&i915->gt.active_rings);
        INIT_LIST_HEAD(&i915->gt.closed_vma);
        spin_lock_init(&i915->gt.closed_lock);
+       lockdep_init_map(&i915->gt.reset_lockmap, "i915.reset", &reset_key, 0);
 
        mutex_lock(&i915->drm.struct_mutex);