drm/i915: Don't claim an unstarted request was guilty

author Chris Wilson <chris@chris-wilson.co.uk>

Fri, 8 Feb 2019 15:37:08 +0000 (15:37 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Fri, 8 Feb 2019 16:47:40 +0000 (16:47 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Fri, 8 Feb 2019 15:37:08 +0000 (15:37 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Fri, 8 Feb 2019 16:47:40 +0000 (16:47 +0000)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index 5e98fd79bd9df4a04950bdae683f8b67e2c4e7dd..1b567a3f006a14fc418001a71735e5baac52f4dc 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1387,6 +1387,10 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
         *cs++ = rq->fence.seqno - 1;
  
         intel_ring_advance(rq, cs);
+
+       /* Record the updated position of the request's payload */
+       rq->infix = intel_ring_offset(rq, cs);
+
         return 0;
  }
  
@@ -1878,6 +1882,23 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
         spin_unlock_irqrestore(&engine->timeline.lock, flags);
  }
  
+static bool lrc_regs_ok(const struct i915_request *rq)
+{
+       const struct intel_ring *ring = rq->ring;
+       const u32 *regs = rq->hw_context->lrc_reg_state;
+
+       /* Quick spot check for the common signs of context corruption */
+
+       if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
+           (RING_CTL_SIZE(ring->size) | RING_VALID))
+               return false;
+
+       if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
+               return false;
+
+       return true;
+}
+
  static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
  {
         struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1912,6 +1933,21 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
         if (!rq)
                 goto out_unlock;
  
+       /*
+        * If this request hasn't started yet, e.g. it is waiting on a
+        * semaphore, we need to avoid skipping the request or else we
+        * break the signaling chain. However, if the context is corrupt
+        * the request will not restart and we will be stuck with a wedged
+        * device. It is quite often the case that if we issue a reset
+        * while the GPU is loading the context image, that the context
+        * image becomes corrupt.
+        *
+        * Otherwise, if we have not started yet, the request should replay
+        * perfectly and we do not need to flag the result as being erroneous.
+        */
+       if (!i915_request_started(rq) && lrc_regs_ok(rq))
+               goto out_unlock;
+
         /*
          * If the request was innocent, we leave the request in the ELSP
          * and will try to replay it on restarting. The context image may
@@ -1924,7 +1960,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
          * image back to the expected values to skip over the guilty request.
          */
         i915_reset_request(rq, stalled);
-       if (!stalled)
+       if (!stalled && lrc_regs_ok(rq))
                 goto out_unlock;
  
         /*
@@ -1942,8 +1978,8 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
                        engine->context_size - PAGE_SIZE);
         }
  
-       /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-       rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
+       /* Rerun the request; its payload has been neutered (if guilty). */
+       rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
         intel_ring_update_space(rq->ring);
  
         execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c

index 9ebd9225684e8743f2b8db772de4c9d9f7f45465..d0b93a3fbc5455b43b09e0721f7dbe1f2ca3698e 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -144,6 +144,13 @@ igt_spinner_create_request(struct igt_spinner *spin,
  
         i915_gem_chipset_flush(spin->i915);
  
+       if (engine->emit_init_breadcrumb &&
+           rq->timeline->has_initial_breadcrumb) {
+               err = engine->emit_init_breadcrumb(rq);
+               if (err)
+                       goto cancel_rq;
+       }
+
         err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
  
  cancel_rq:
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c

index 4886fac126289077f8e3c34ba91358cba9458180..92475596ff40ab6946eb321cd338c9f1a60ccd57 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -242,6 +242,12 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
         *batch++ = MI_BATCH_BUFFER_END; /* not reached */
         i915_gem_chipset_flush(h->i915);
  
+       if (rq->engine->emit_init_breadcrumb) {
+               err = rq->engine->emit_init_breadcrumb(rq);
+               if (err)
+                       goto cancel_rq;
+       }
+
         flags = 0;
         if (INTEL_GEN(vm->i915) <= 5)
                 flags |= I915_DISPATCH_SECURE;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 8 Feb 2019 15:37:08 +0000 (15:37 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 8 Feb 2019 16:47:40 +0000 (16:47 +0000)
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/igt_spinner.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/intel_hangcheck.c		patch \| blob \| history