trace_i915_request_out(rq);
}
-static void
-execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
-{
- ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
- ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
- ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
- ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
-}
-
static u64 execlists_update_context(struct i915_request *rq)
{
- struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
struct intel_context *ce = rq->hw_context;
- u32 *reg_state = ce->lrc_reg_state;
- reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
-
- /*
- * True 32b PPGTT with dynamic page allocation: update PDP
- * registers and point the unallocated PDPs to scratch page.
- * PML4 is allocated during ppgtt init, so this is not needed
- * in 48-bit mode.
- */
- if (!i915_vm_is_48bit(&ppgtt->vm))
- execlists_update_context_pdps(ppgtt, reg_state);
+ ce->lrc_reg_state[CTX_RING_TAIL + 1] =
+ intel_ring_set_tail(rq->ring, rq->tail);
/*
* Make sure the context image is complete before we submit it to HW.
return __execlists_context_pin(engine, ctx, ce);
}
+static int emit_pdps(struct i915_request *rq)
+{
+ const struct intel_engine_cs * const engine = rq->engine;
+ struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt;
+ int err, i;
+ u32 *cs;
+
+ GEM_BUG_ON(intel_vgpu_active(rq->i915));
+
+ /*
+ * Beware ye of the dragons, this sequence is magic!
+ *
+ * Small changes to this sequence can cause anything from
+ * GPU hangs to forcewake errors and machine lockups!
+ */
+
+ /* Flush any residual operations from the context load */
+ err = engine->emit_flush(rq, EMIT_FLUSH);
+ if (err)
+ return err;
+
+ /* Magic required to prevent forcewake errors! */
+ err = engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ return err;
+
+ cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Ensure the LRI have landed before we invalidate & continue */
+ *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ for (i = GEN8_3LVL_PDPES; i--; ) {
+ const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = upper_32_bits(pd_daddr);
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = lower_32_bits(pd_daddr);
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ /* Be doubly sure the LRI have landed before proceeding */
+ err = engine->emit_flush(rq, EMIT_FLUSH);
+ if (err)
+ return err;
+
+ /* Re-invalidate the TLB for luck */
+ return engine->emit_flush(rq, EMIT_INVALIDATE);
+}
+
static int execlists_request_alloc(struct i915_request *request)
{
int ret;
*/
request->reserved_space += EXECLISTS_REQUEST_SIZE;
- /* Unconditionally invalidate GPU caches and TLBs. */
- ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
- if (ret)
- return ret;
-
/*
* Note that after this point, we have committed to using
* this request as it is being used to both track the
* to cancel/unwind this request now.
*/
+ /* Unconditionally invalidate GPU caches and TLBs. */
+ if (i915_vm_is_48bit(&request->gem_context->ppgtt->vm))
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+ else
+ ret = emit_pdps(request);
+ if (ret)
+ return ret;
+
request->reserved_space -= EXECLISTS_REQUEST_SIZE;
return 0;
}
atomic_read(&execlists->tasklet.count));
}
-static int intel_logical_ring_emit_pdps(struct i915_request *rq)
-{
- struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
- struct intel_engine_cs *engine = rq->engine;
- const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
- u32 *cs;
- int i;
-
- cs = intel_ring_begin(rq, num_lri_cmds * 2 + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds);
- for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
- const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
- *cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
- *cs++ = lower_32_bits(pd_daddr);
- }
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
u32 *cs;
- int ret;
-
- /* Don't rely in hw updating PDPs, specially in lite-restore.
- * Ideally, we should set Force PD Restore in ctx descriptor,
- * but we can't. Force Restore would be a second option, but
- * it is unsafe in case of lite-restore (because the ctx is
- * not idle). PML4 is allocated during ppgtt init so this is
- * not needed in 48-bit.*/
- if ((intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) &&
- !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) &&
- !intel_vgpu_active(rq->i915)) {
- ret = intel_logical_ring_emit_pdps(rq);
- if (ret)
- return ret;
-
- rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
- }
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
*cs++ = MI_NOOP;
+
intel_ring_advance(rq, cs);
return 0;
* other PDP Descriptors are ignored.
*/
ASSIGN_CTX_PML4(ctx->ppgtt, regs);
+ } else {
+ ASSIGN_CTX_PDP(ctx->ppgtt, regs, 3);
+ ASSIGN_CTX_PDP(ctx->ppgtt, regs, 2);
+ ASSIGN_CTX_PDP(ctx->ppgtt, regs, 1);
+ ASSIGN_CTX_PDP(ctx->ppgtt, regs, 0);
}
if (rcs) {