drm/i915/execlists: Reinitialise context image after GPU hang
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 4 Oct 2016 20:11:26 +0000 (21:11 +0100)
committerJani Nikula <jani.nikula@intel.com>
Mon, 10 Oct 2016 13:06:42 +0000 (16:06 +0300)
On Braswell, at least, we observe that the context image is written in
multiple phases. The first phase is to clear the register state, and
subsequently rewrite it. A GPU reset at the right moment can interrupt
the context update leaving it corrupt, and our update of the RING_HEAD
is not sufficient to restart the engine afterwards. To recover, we need
to reset the registers back to their original values. The context state
is lost. What we need is a better mechanism to serialise the reset with
pending flushes from the GPU.

Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161004201132.21801-2-chris@chris-wilson.co.uk
(cherry picked from commit a3aabe86a3406b9946a4f7707762a833a58dfe9c)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
drivers/gpu/drm/i915/intel_lrc.c

index 251143361f31e5bf83a3d4df810d222d657f8731..7da83330257d0495f9d7d583e178c5339b9d2b9b 100644 (file)
@@ -226,10 +226,16 @@ enum {
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 
+#define WA_TAIL_DWORDS 2
+
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
                                            struct intel_engine_cs *engine);
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
                                struct intel_engine_cs *engine);
+static void execlists_init_reg_state(u32 *reg_state,
+                                    struct i915_gem_context *ctx,
+                                    struct intel_engine_cs *engine,
+                                    struct intel_ring *ring);
 
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
@@ -708,7 +714,6 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 {
        struct intel_context *ce = &ctx->engine[engine->id];
        void *vaddr;
-       u32 *lrc_reg_state;
        int ret;
 
        lockdep_assert_held(&ctx->i915->drm.struct_mutex);
@@ -727,17 +732,16 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
                goto unpin_vma;
        }
 
-       lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
-
        ret = intel_ring_pin(ce->ring);
        if (ret)
                goto unpin_map;
 
        intel_lr_context_descriptor_update(ctx, engine);
 
-       lrc_reg_state[CTX_RING_BUFFER_START+1] =
+       ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+       ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
                i915_ggtt_offset(ce->ring->vma);
-       ce->lrc_reg_state = lrc_reg_state;
+
        ce->state->obj->dirty = true;
 
        /* Invalidate GuC TLB. */
@@ -1289,8 +1293,21 @@ static void reset_common_ring(struct intel_engine_cs *engine,
        struct execlist_port *port = engine->execlist_port;
        struct intel_context *ce = &request->ctx->engine[engine->id];
 
+       /* We want a simple context + ring to execute the breadcrumb update.
+        * We cannot rely on the context being intact across the GPU hang,
+        * so clear it and rebuild just what we need for the breadcrumb.
+        * All pending requests for this context will be zapped, and any
+        * future request will be after userspace has had the opportunity
+        * to recreate its own state.
+        */
+       execlists_init_reg_state(ce->lrc_reg_state,
+                                request->ctx, engine, ce->ring);
+
        /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
+       ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
+               i915_ggtt_offset(ce->ring->vma);
        ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+
        request->ring->head = request->postfix;
        request->ring->last_retired_head = -1;
        intel_ring_update_space(request->ring);
@@ -1310,6 +1327,9 @@ static void reset_common_ring(struct intel_engine_cs *engine,
        GEM_BUG_ON(request->ctx != port[0].request->ctx);
        port[0].count = 0;
        port[1].count = 0;
+
+       /* Reset WaIdleLiteRestore:bdw,skl as well */
+       request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@@ -1547,7 +1567,6 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
  * used as a workaround for not being allowed to do lite
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
-#define WA_TAIL_DWORDS 2
 
 static int gen8_emit_request(struct drm_i915_gem_request *request)
 {
@@ -1894,38 +1913,13 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
        return indirect_ctx_offset;
 }
 
-static int
-populate_lr_context(struct i915_gem_context *ctx,
-                   struct drm_i915_gem_object *ctx_obj,
-                   struct intel_engine_cs *engine,
-                   struct intel_ring *ring)
+static void execlists_init_reg_state(u32 *reg_state,
+                                    struct i915_gem_context *ctx,
+                                    struct intel_engine_cs *engine,
+                                    struct intel_ring *ring)
 {
-       struct drm_i915_private *dev_priv = ctx->i915;
-       struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
-       void *vaddr;
-       u32 *reg_state;
-       int ret;
-
-       if (!ppgtt)
-               ppgtt = dev_priv->mm.aliasing_ppgtt;
-
-       ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
-       if (ret) {
-               DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
-               return ret;
-       }
-
-       vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
-       if (IS_ERR(vaddr)) {
-               ret = PTR_ERR(vaddr);
-               DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
-               return ret;
-       }
-       ctx_obj->dirty = true;
-
-       /* The second page of the context object contains some fields which must
-        * be set up prior to the first execution. */
-       reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+       struct drm_i915_private *dev_priv = engine->i915;
+       struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
 
        /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
         * commands followed by (reg, value) pairs. The values we are setting here are
@@ -1939,14 +1933,11 @@ populate_lr_context(struct i915_gem_context *ctx,
                       _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
                                          CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
                                          (HAS_RESOURCE_STREAMER(dev_priv) ?
-                                           CTX_CTRL_RS_CTX_ENABLE : 0)));
+                                          CTX_CTRL_RS_CTX_ENABLE : 0)));
        ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base),
                       0);
        ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base),
                       0);
-       /* Ring buffer start address is not known until the buffer is pinned.
-        * It is written to the context image in execlists_update_context()
-        */
        ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START,
                       RING_START(engine->mmio_base), 0);
        ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL,
@@ -2029,6 +2020,36 @@ populate_lr_context(struct i915_gem_context *ctx,
                ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
                               make_rpcs(dev_priv));
        }
+}
+
+static int
+populate_lr_context(struct i915_gem_context *ctx,
+                   struct drm_i915_gem_object *ctx_obj,
+                   struct intel_engine_cs *engine,
+                   struct intel_ring *ring)
+{
+       void *vaddr;
+       int ret;
+
+       ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
+       if (ret) {
+               DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
+               return ret;
+       }
+
+       vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
+       if (IS_ERR(vaddr)) {
+               ret = PTR_ERR(vaddr);
+               DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
+               return ret;
+       }
+       ctx_obj->dirty = true;
+
+       /* The second page of the context object contains some fields which must
+        * be set up prior to the first execution. */
+
+       execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+                                ctx, engine, ring);
 
        i915_gem_object_unpin_map(ctx_obj);