drm/i915: Pipeline PDP updates for Braswell

author Chris Wilson <chris@chris-wilson.co.uk>

Fri, 7 Dec 2018 09:02:13 +0000 (09:02 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Fri, 7 Dec 2018 12:13:02 +0000 (12:13 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Fri, 7 Dec 2018 09:02:13 +0000 (09:02 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Fri, 7 Dec 2018 12:13:02 +0000 (12:13 +0000)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c

index add1fe7aeb930f2e21e73d12dbf96f095decc5be..62bde517d383af137ab19780102929cb86132021 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1423,8 +1423,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
                         gen8_initialize_pd(vm, pd);
                         gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
                         GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
-
-                       mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
                 }
  
                 ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index a9084a54c0f4caffb757f74b2c033321d63d039c..ae7e5b288ed59f76c1567992e4bdb54d6e2ef288 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -363,31 +363,12 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
         trace_i915_request_out(rq);
  }
  
-static void
-execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
-{
-       ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
-       ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
-       ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
-       ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
-}
-
  static u64 execlists_update_context(struct i915_request *rq)
  {
-       struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
         struct intel_context *ce = rq->hw_context;
-       u32 *reg_state = ce->lrc_reg_state;
  
-       reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
-
-       /*
-        * True 32b PPGTT with dynamic page allocation: update PDP
-        * registers and point the unallocated PDPs to scratch page.
-        * PML4 is allocated during ppgtt init, so this is not needed
-        * in 48-bit mode.
-        */
-       if (!i915_vm_is_48bit(&ppgtt->vm))
-               execlists_update_context_pdps(ppgtt, reg_state);
+       ce->lrc_reg_state[CTX_RING_TAIL + 1] =
+               intel_ring_set_tail(rq->ring, rq->tail);
  
         /*
          * Make sure the context image is complete before we submit it to HW.
@@ -1270,6 +1251,59 @@ execlists_context_pin(struct intel_engine_cs *engine,
         return __execlists_context_pin(engine, ctx, ce);
  }
  
+static int emit_pdps(struct i915_request *rq)
+{
+       const struct intel_engine_cs * const engine = rq->engine;
+       struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt;
+       int err, i;
+       u32 *cs;
+
+       GEM_BUG_ON(intel_vgpu_active(rq->i915));
+
+       /*
+        * Beware ye of the dragons, this sequence is magic!
+        *
+        * Small changes to this sequence can cause anything from
+        * GPU hangs to forcewake errors and machine lockups!
+        */
+
+       /* Flush any residual operations from the context load */
+       err = engine->emit_flush(rq, EMIT_FLUSH);
+       if (err)
+               return err;
+
+       /* Magic required to prevent forcewake errors! */
+       err = engine->emit_flush(rq, EMIT_INVALIDATE);
+       if (err)
+               return err;
+
+       cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       /* Ensure the LRI have landed before we invalidate & continue */
+       *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+       for (i = GEN8_3LVL_PDPES; i--; ) {
+               const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+               *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+               *cs++ = upper_32_bits(pd_daddr);
+               *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+               *cs++ = lower_32_bits(pd_daddr);
+       }
+       *cs++ = MI_NOOP;
+
+       intel_ring_advance(rq, cs);
+
+       /* Be doubly sure the LRI have landed before proceeding */
+       err = engine->emit_flush(rq, EMIT_FLUSH);
+       if (err)
+               return err;
+
+       /* Re-invalidate the TLB for luck */
+       return engine->emit_flush(rq, EMIT_INVALIDATE);
+}
+
  static int execlists_request_alloc(struct i915_request *request)
  {
         int ret;
@@ -1283,11 +1317,6 @@ static int execlists_request_alloc(struct i915_request *request)
          */
         request->reserved_space += EXECLISTS_REQUEST_SIZE;
  
-       /* Unconditionally invalidate GPU caches and TLBs. */
-       ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
-       if (ret)
-               return ret;
-
         /*
          * Note that after this point, we have committed to using
          * this request as it is being used to both track the
@@ -1296,6 +1325,14 @@ static int execlists_request_alloc(struct i915_request *request)
          * to cancel/unwind this request now.
          */
  
+       /* Unconditionally invalidate GPU caches and TLBs. */
+       if (i915_vm_is_48bit(&request->gem_context->ppgtt->vm))
+               ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+       else
+               ret = emit_pdps(request);
+       if (ret)
+               return ret;
+
         request->reserved_space -= EXECLISTS_REQUEST_SIZE;
         return 0;
  }
@@ -1831,56 +1868,11 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
                   atomic_read(&execlists->tasklet.count));
  }
  
-static int intel_logical_ring_emit_pdps(struct i915_request *rq)
-{
-       struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
-       struct intel_engine_cs *engine = rq->engine;
-       const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
-       u32 *cs;
-       int i;
-
-       cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2);
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
-
-       *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds);
-       for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
-               const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-
-               *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
-               *cs++ = upper_32_bits(pd_daddr);
-               *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
-               *cs++ = lower_32_bits(pd_daddr);
-       }
-
-       *cs++ = MI_NOOP;
-       intel_ring_advance(rq, cs);
-
-       return 0;
-}
-
  static int gen8_emit_bb_start(struct i915_request *rq,
                               u64 offset, u32 len,
                               const unsigned int flags)
  {
         u32 *cs;
-       int ret;
-
-       /* Don't rely in hw updating PDPs, specially in lite-restore.
-        * Ideally, we should set Force PD Restore in ctx descriptor,
-        * but we can't. Force Restore would be a second option, but
-        * it is unsafe in case of lite-restore (because the ctx is
-        * not idle). PML4 is allocated during ppgtt init so this is
-        * not needed in 48-bit.*/
-       if ((intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) &&
-           !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) &&
-           !intel_vgpu_active(rq->i915)) {
-               ret = intel_logical_ring_emit_pdps(rq);
-               if (ret)
-                       return ret;
-
-               rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
-       }
  
         cs = intel_ring_begin(rq, 6);
         if (IS_ERR(cs))
@@ -1913,6 +1905,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
  
         *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
         *cs++ = MI_NOOP;
+
         intel_ring_advance(rq, cs);
  
         return 0;
@@ -2524,6 +2517,11 @@ static void execlists_init_reg_state(u32 *regs,
                  * other PDP Descriptors are ignored.
                  */
                 ASSIGN_CTX_PML4(ctx->ppgtt, regs);
+       } else {
+               ASSIGN_CTX_PDP(ctx->ppgtt, regs, 3);
+               ASSIGN_CTX_PDP(ctx->ppgtt, regs, 2);
+               ASSIGN_CTX_PDP(ctx->ppgtt, regs, 1);
+               ASSIGN_CTX_PDP(ctx->ppgtt, regs, 0);
         }
  
         if (rcs) {
author	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 7 Dec 2018 09:02:13 +0000 (09:02 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 7 Dec 2018 12:13:02 +0000 (12:13 +0000)
drivers/gpu/drm/i915/i915_gem_gtt.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| history