drm/i915/ringbuffer: Fix context restore upon reset
authorChris Wilson <chris@chris-wilson.co.uk>
Mon, 11 Jun 2018 11:08:44 +0000 (12:08 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 11 Jun 2018 13:03:47 +0000 (14:03 +0100)
The discovery with trying to enable full-ppgtt was that we were
completely failing to the load both the mm and context following the
reset. Although we were performing mmio to set the PP_DIR (per-process
GTT) and CCID (context), these were taking no effect (the assumption was
that this would trigger reload of the context and restore the page
tables). It was not until we performed the LRI + MI_SET_CONTEXT in a
following context switch would anything occur.

Since we are then required to reset the context image and PP_DIR using
CS commands, we place those commands into every batch. The hardware
should recognise the no-ops and eliminate the expensive context loads,
but we still have to pay the cost of using cross-powerwell register
writes. In practice, this has no effect on actual context switch times,
and only adds a few hundred nanoseconds to no-op switches. We can improve
the latter by eliminating the w/a around known no-op switches, but there
is an ulterior motive to keeping them.

Always emitting the context switch at the beginning of the request (and
relying on HW to skip unneeded switches) does have one key advantage.
Should we implement request reordering on Haswell, we will not know in
advance what the previous executing context was on the GPU and so we
would not be able to elide the MI_SET_CONTEXT commands ourselves and
always have to emit them. Having our hand forced now actually prepares
us for later.

Now since that context and mm follow the request, we no longer (and not
for a long time since requests took over!) require a trace point to tell
when we write the switch into the ring, since it is always. (This is
even more important when you remember that simply writing into the ring
bears no relation to the current mm.)

v2: Sandybridge has to agree to use LRI as well.

Testcase: igt/drv_selftests/live_hangcheck
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180611110845.31890-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_gem_gtt.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_request.h
drivers/gpu/drm/i915/i915_trace.h
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h

index 7ccfdbc8f9b43c7188f30bfd5d108c9021585a64..ac75e0c5735c69f486023903691e4d11899a13e9 100644 (file)
@@ -1712,45 +1712,6 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
        wmb();
 }
 
-static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt)
-{
-       GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
-       return ppgtt->pd.base.ggtt_offset << 10;
-}
-
-static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
-                         struct i915_request *rq)
-{
-       struct intel_engine_cs *engine = rq->engine;
-       u32 *cs;
-
-       /* NB: TLBs must be flushed and invalidated before a switch */
-       cs = intel_ring_begin(rq, 6);
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
-
-       *cs++ = MI_LOAD_REGISTER_IMM(2);
-       *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
-       *cs++ = PP_DIR_DCLV_2G;
-       *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
-       *cs++ = get_pd_offset(ppgtt);
-       *cs++ = MI_NOOP;
-       intel_ring_advance(rq, cs);
-
-       return 0;
-}
-
-static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
-                         struct i915_request *rq)
-{
-       struct intel_engine_cs *engine = rq->engine;
-       struct drm_i915_private *dev_priv = rq->i915;
-
-       I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
-       I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
-       return 0;
-}
-
 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
 {
        struct intel_engine_cs *engine;
@@ -2024,12 +1985,6 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
        ppgtt->vm.dma = &i915->drm.pdev->dev;
 
        ppgtt->vm.pte_encode = ggtt->vm.pte_encode;
-       if (IS_GEN6(i915))
-               ppgtt->switch_mm = gen6_mm_switch;
-       else if (IS_GEN7(i915))
-               ppgtt->switch_mm = gen7_mm_switch;
-       else
-               BUG();
 
        err = gen6_ppgtt_alloc(ppgtt);
        if (err)
index 16307ba7e30352e932d6ee891b65d602506409a1..e70f6abcd0f28f3be6ceed8cac766d9a435bdd87 100644 (file)
@@ -406,8 +406,6 @@ struct i915_hw_ppgtt {
 
        gen6_pte_t __iomem *pd_addr;
 
-       int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
-                        struct i915_request *rq);
        void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
 };
 
index f187250e60c6f56bd1f349284f00be9478fe8073..9092f5464c24d7e5914a4485cfb051748bac790b 100644 (file)
@@ -817,6 +817,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
        /* Keep a second pin for the dual retirement along engine and ring */
        __intel_context_pin(ce);
 
+       rq->infix = rq->ring->emit; /* end of header; start of user payload */
+
        /* Check that we didn't interrupt ourselves with a new request */
        GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
        return rq;
index 491ff81d0fea78c136224ed221ef2be3a8b31b42..0e9aba53d0e423528d2e9759c31118ba9c227aae 100644 (file)
@@ -134,6 +134,9 @@ struct i915_request {
        /** Position in the ring of the start of the request */
        u32 head;
 
+       /** Position in the ring of the start of the user packets */
+       u32 infix;
+
        /**
         * Position in the ring of the start of the postfix.
         * This is required to calculate the maximum available ring space
index 1472f48ab2e804ca9404155b8004d18237b657ec..b50c6b829715e220c9f3edede3dfa0e83497a804 100644 (file)
@@ -973,39 +973,6 @@ DEFINE_EVENT(i915_context, i915_context_free,
        TP_ARGS(ctx)
 );
 
-/**
- * DOC: switch_mm tracepoint
- *
- * This tracepoint allows tracking of the mm switch, which is an important point
- * in the lifetime of the vm in the legacy submission path. This tracepoint is
- * called only if full ppgtt is enabled.
- */
-TRACE_EVENT(switch_mm,
-       TP_PROTO(struct intel_engine_cs *engine, struct i915_gem_context *to),
-
-       TP_ARGS(engine, to),
-
-       TP_STRUCT__entry(
-                       __field(u16, class)
-                       __field(u16, instance)
-                       __field(struct i915_gem_context *, to)
-                       __field(struct i915_address_space *, vm)
-                       __field(u32, dev)
-       ),
-
-       TP_fast_assign(
-                       __entry->class = engine->uabi_class;
-                       __entry->instance = engine->instance;
-                       __entry->to = to;
-                       __entry->vm = to->ppgtt ? &to->ppgtt->vm : NULL;
-                       __entry->dev = engine->i915->drm.primary->index;
-       ),
-
-       TP_printk("dev=%u, engine=%u:%u, ctx=%p, ctx_vm=%p",
-                 __entry->dev, __entry->class, __entry->instance, __entry->to,
-                 __entry->vm)
-);
-
 #endif /* _I915_TRACE_H_ */
 
 /* This part must be outside protection */
index 2ec2e60dc670711e2593994d5b58773c32733e06..d1cf8b4926abe2f73644d179c1e52a43520bf7d6 100644 (file)
@@ -1168,9 +1168,6 @@ void intel_engine_lost_context(struct intel_engine_cs *engine)
 
        lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
-       engine->legacy_active_context = NULL;
-       engine->legacy_active_ppgtt = NULL;
-
        ce = fetch_and_zero(&engine->last_retired_context);
        if (ce)
                intel_context_unpin(ce);
index 5bc53a5f4504e5f8e8b2e6756acc79c95d435280..d72a6a5ff3acbf11974c8a8f211f015492911f25 100644 (file)
@@ -541,11 +541,23 @@ static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
        return i915_gem_find_active_request(engine);
 }
 
-static void reset_ring(struct intel_engine_cs *engine,
-                      struct i915_request *request)
+static void skip_request(struct i915_request *rq)
 {
-       GEM_TRACE("%s seqno=%x\n",
-                 engine->name, request ? request->global_seqno : 0);
+       void *vaddr = rq->ring->vaddr;
+       u32 head;
+
+       head = rq->infix;
+       if (rq->postfix < head) {
+               memset32(vaddr + head, MI_NOOP,
+                        (rq->ring->size - head) / sizeof(u32));
+               head = 0;
+       }
+       memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
+}
+
+static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+       GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0);
 
        /*
         * RC6 must be prevented until the reset is complete and the engine
@@ -569,43 +581,11 @@ static void reset_ring(struct intel_engine_cs *engine,
         * If the request was innocent, we try to replay the request with
         * the restored context.
         */
-       if (request) {
-               struct drm_i915_private *dev_priv = request->i915;
-               struct intel_context *ce = request->hw_context;
-               struct i915_hw_ppgtt *ppgtt;
-
-               if (ce->state) {
-                       I915_WRITE(CCID,
-                                  i915_ggtt_offset(ce->state) |
-                                  BIT(8) /* must be set! */ |
-                                  CCID_EXTENDED_STATE_SAVE |
-                                  CCID_EXTENDED_STATE_RESTORE |
-                                  CCID_EN);
-               }
-
-               ppgtt = request->gem_context->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
-               if (ppgtt) {
-                       u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
-
-                       I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
-                       I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset);
-
-                       /* Wait for the PD reload to complete */
-                       if (intel_wait_for_register(dev_priv,
-                                                   RING_PP_DIR_BASE(engine),
-                                                   BIT(0), 0,
-                                                   10))
-                               DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n");
-
-                       ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
-               }
-
+       if (rq) {
                /* If the rq hung, jump to its breadcrumb and skip the batch */
-               if (request->fence.error == -EIO)
-                       request->ring->head = request->postfix;
-       } else {
-               engine->legacy_active_context = NULL;
-               engine->legacy_active_ppgtt = NULL;
+               rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
+               if (rq->fence.error == -EIO)
+                       skip_request(rq);
        }
 }
 
@@ -1446,6 +1426,29 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
                intel_ring_reset(engine->buffer, 0);
 }
 
+static int load_pd_dir(struct i915_request *rq,
+                      const struct i915_hw_ppgtt *ppgtt)
+{
+       const struct intel_engine_cs * const engine = rq->engine;
+       u32 *cs;
+
+       cs = intel_ring_begin(rq, 6);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       *cs++ = MI_LOAD_REGISTER_IMM(1);
+       *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
+       *cs++ = PP_DIR_DCLV_2G;
+
+       *cs++ = MI_LOAD_REGISTER_IMM(1);
+       *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
+       *cs++ = ppgtt->pd.base.ggtt_offset << 10;
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
 static inline int mi_set_context(struct i915_request *rq, u32 flags)
 {
        struct drm_i915_private *i915 = rq->i915;
@@ -1590,31 +1593,28 @@ static int remap_l3(struct i915_request *rq, int slice)
 static int switch_context(struct i915_request *rq)
 {
        struct intel_engine_cs *engine = rq->engine;
-       struct i915_gem_context *to_ctx = rq->gem_context;
-       struct i915_hw_ppgtt *to_mm =
-               to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
-       struct i915_gem_context *from_ctx = engine->legacy_active_context;
-       struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt;
+       struct i915_gem_context *ctx = rq->gem_context;
+       struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
+       unsigned int unwind_mm = 0;
        u32 hw_flags = 0;
        int ret, i;
 
        lockdep_assert_held(&rq->i915->drm.struct_mutex);
        GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
-       if (to_mm != from_mm ||
-           (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) {
-               trace_switch_mm(engine, to_ctx);
-               ret = to_mm->switch_mm(to_mm, rq);
+       if (ppgtt) {
+               ret = load_pd_dir(rq, ppgtt);
                if (ret)
                        goto err;
 
-               to_mm->pd_dirty_rings &= ~intel_engine_flag(engine);
-               engine->legacy_active_ppgtt = to_mm;
-               hw_flags = MI_FORCE_RESTORE;
+               if (intel_engine_flag(engine) & ppgtt->pd_dirty_rings) {
+                       unwind_mm = intel_engine_flag(engine);
+                       ppgtt->pd_dirty_rings &= ~unwind_mm;
+                       hw_flags = MI_FORCE_RESTORE;
+               }
        }
 
-       if (rq->hw_context->state &&
-           (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) {
+       if (rq->hw_context->state) {
                GEM_BUG_ON(engine->id != RCS);
 
                /*
@@ -1624,35 +1624,32 @@ static int switch_context(struct i915_request *rq)
                 * as nothing actually executes using the kernel context; it
                 * is purely used for flushing user contexts.
                 */
-               if (i915_gem_context_is_kernel(to_ctx))
+               if (i915_gem_context_is_kernel(ctx))
                        hw_flags = MI_RESTORE_INHIBIT;
 
                ret = mi_set_context(rq, hw_flags);
                if (ret)
                        goto err_mm;
-
-               engine->legacy_active_context = to_ctx;
        }
 
-       if (to_ctx->remap_slice) {
+       if (ctx->remap_slice) {
                for (i = 0; i < MAX_L3_SLICES; i++) {
-                       if (!(to_ctx->remap_slice & BIT(i)))
+                       if (!(ctx->remap_slice & BIT(i)))
                                continue;
 
                        ret = remap_l3(rq, i);
                        if (ret)
-                               goto err_ctx;
+                               goto err_mm;
                }
 
-               to_ctx->remap_slice = 0;
+               ctx->remap_slice = 0;
        }
 
        return 0;
 
-err_ctx:
-       engine->legacy_active_context = from_ctx;
 err_mm:
-       engine->legacy_active_ppgtt = from_mm;
+       if (unwind_mm)
+               ppgtt->pd_dirty_rings |= unwind_mm;
 err:
        return ret;
 }
index acef385c4c80ddf293e898f715d504454b7ea177..b44c678497494c4a7cc763f7d662e03751193d90 100644 (file)
@@ -557,15 +557,6 @@ struct intel_engine_cs {
         */
        struct intel_context *last_retired_context;
 
-       /* We track the current MI_SET_CONTEXT in order to eliminate
-        * redudant context switches. This presumes that requests are not
-        * reordered! Or when they are the tracking is updated along with
-        * the emission of individual requests into the legacy command
-        * stream (ring).
-        */
-       struct i915_gem_context *legacy_active_context;
-       struct i915_hw_ppgtt *legacy_active_ppgtt;
-
        /* status_notifier: list of callbacks for context-switch changes */
        struct atomic_notifier_head context_status_notifier;