container_of(fence, typeof(*request), submit);
struct intel_engine_cs *engine = request->engine;
+ if (state != FENCE_COMPLETE)
+ return NOTIFY_DONE;
+
/* Will be called from irq-context when using foreign DMA fences */
- switch (state) {
- case FENCE_COMPLETE:
- engine->timeline->last_submitted_seqno = request->fence.seqno;
- engine->submit_request(request);
- break;
+ engine->timeline->last_submitted_seqno = request->fence.seqno;
- case FENCE_FREE:
- break;
- }
+ engine->emit_breadcrumb(request,
+ request->ring->vaddr + request->postfix);
+ engine->submit_request(request);
return NOTIFY_DONE;
}
struct intel_ring *ring = request->ring;
struct intel_timeline *timeline = request->timeline;
struct drm_i915_gem_request *prev;
- u32 request_start;
- u32 reserved_tail;
- int ret;
+ int err;
lockdep_assert_held(&request->i915->drm.struct_mutex);
trace_i915_gem_request_add(request);
* should already have been reserved in the ring buffer. Let the ring
* know that it is time to use that space up.
*/
- request_start = ring->tail;
- reserved_tail = request->reserved_space;
request->reserved_space = 0;
/*
* what.
*/
if (flush_caches) {
- ret = engine->emit_flush(request, EMIT_FLUSH);
+ err = engine->emit_flush(request, EMIT_FLUSH);
/* Not allowed to fail! */
- WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
+ WARN(err, "engine->emit_flush() failed: %d!\n", err);
}
/* Record the position of the start of the breadcrumb so that
* GPU processing the request, we never over-estimate the
* position of the ring's HEAD.
*/
+ err = intel_ring_begin(request, engine->emit_breadcrumb_sz);
+ GEM_BUG_ON(err);
request->postfix = ring->tail;
-
- /* Not allowed to fail! */
- ret = engine->emit_breadcrumb(request);
- WARN(ret, "(%s)->emit_breadcrumb failed: %d!\n", engine->name, ret);
-
- /* Sanity check that the reserved size was large enough. */
- ret = ring->tail - request_start;
- if (ret < 0)
- ret += ring->size;
- WARN_ONCE(ret > reserved_tail,
- "Not enough space reserved (%d bytes) "
- "for adding the request (%d bytes)\n",
- reserved_tail, ret);
+ ring->tail += engine->emit_breadcrumb_sz * sizeof(u32);
/* Seal the request and mark it as pending execution. Note that
* we may inspect this state, without holding any locks, during
struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
u32 *reg_state = ce->lrc_reg_state;
- reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
+ reg_state[CTX_RING_TAIL+1] = rq->tail;
/* True 32b PPGTT with dynamic page allocation: update PDP
* registers and point the unallocated PDPs to scratch page.
spin_lock_irqsave(&engine->execlist_lock, flags);
+ /* We keep the previous context alive until we retire the following
+ * request. This ensures that any the context object is still pinned
+ * for any residual writes the HW makes into it on the context switch
+ * into the next object following the breadcrumb. Otherwise, we may
+ * retire the context too early.
+ */
+ request->previous_context = engine->last_context;
+ engine->last_context = request->ctx;
+
list_add_tail(&request->execlist_link, &engine->execlist_queue);
if (execlists_elsp_idle(engine))
tasklet_hi_schedule(&engine->irq_tasklet);
return ret;
}
-/*
- * intel_logical_ring_advance() - advance the tail and prepare for submission
- * @request: Request to advance the logical ringbuffer of.
- *
- * The tail is updated in our logical ringbuffer struct, not in the actual context. What
- * really happens during submission is that the context and current tail will be placed
- * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
- * point, the tail *inside* the context is updated and the ELSP written to.
- */
-static int
-intel_logical_ring_advance(struct drm_i915_gem_request *request)
-{
- struct intel_ring *ring = request->ring;
- struct intel_engine_cs *engine = request->engine;
-
- intel_ring_advance(ring);
- request->tail = ring->tail;
-
- /*
- * Here we add two extra NOOPs as padding to avoid
- * lite restore of a context with HEAD==TAIL.
- *
- * Caller must reserve WA_TAIL_DWORDS for us!
- */
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
- request->wa_tail = ring->tail;
-
- /* We keep the previous context alive until we retire the following
- * request. This ensures that any the context object is still pinned
- * for any residual writes the HW makes into it on the context switch
- * into the next object following the breadcrumb. Otherwise, we may
- * retire the context too early.
- */
- request->previous_context = engine->last_context;
- engine->last_context = request->ctx;
- return 0;
-}
-
static int intel_lr_context_pin(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
* used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore).
*/
-
-static int gen8_emit_breadcrumb(struct drm_i915_gem_request *request)
+static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out)
{
- struct intel_ring *ring = request->ring;
- int ret;
-
- ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS);
- if (ret)
- return ret;
+ *out++ = MI_NOOP;
+ *out++ = MI_NOOP;
+ request->wa_tail = intel_ring_offset(request->ring, out);
+}
+static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request,
+ u32 *out)
+{
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
- intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
- intel_ring_emit(ring,
- intel_hws_seqno_address(request->engine) |
- MI_FLUSH_DW_USE_GTT);
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, request->global_seqno);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_emit(ring, MI_NOOP);
- return intel_logical_ring_advance(request);
+ *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ *out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
+ *out++ = 0;
+ *out++ = request->global_seqno;
+ *out++ = MI_USER_INTERRUPT;
+ *out++ = MI_NOOP;
+ request->tail = intel_ring_offset(request->ring, out);
+
+ gen8_emit_wa_tail(request, out);
}
static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
-static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request)
+static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
+ u32 *out)
{
- struct intel_ring *ring = request->ring;
- int ret;
-
- ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS);
- if (ret)
- return ret;
-
/* We're using qword write, seqno should be aligned to 8 bytes. */
BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
* need a prior CS_STALL, which is emitted by the flush
* following the batch.
*/
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
- intel_ring_emit(ring,
- (PIPE_CONTROL_GLOBAL_GTT_IVB |
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_QW_WRITE));
- intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, request->global_seqno);
+ *out++ = GFX_OP_PIPE_CONTROL(6);
+ *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE);
+ *out++ = intel_hws_seqno_address(request->engine);
+ *out++ = 0;
+ *out++ = request->global_seqno;
/* We're thrashing one dword of HWS. */
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_emit(ring, MI_NOOP);
- return intel_logical_ring_advance(request);
+ *out++ = 0;
+ *out++ = MI_USER_INTERRUPT;
+ *out++ = MI_NOOP;
+ request->tail = intel_ring_offset(request->ring, out);
+
+ gen8_emit_wa_tail(request, out);
}
static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
i915_vma_unpin_and_release(&dev_priv->semaphore);
}
-static int gen8_rcs_signal(struct drm_i915_gem_request *req)
+static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out)
{
- struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *waiter;
enum intel_engine_id id;
- int ret, num_rings;
-
- num_rings = INTEL_INFO(dev_priv)->num_rings;
- ret = intel_ring_begin(req, (num_rings-1) * 8);
- if (ret)
- return ret;
for_each_engine(waiter, dev_priv, id) {
u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue;
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
- intel_ring_emit(ring,
- PIPE_CONTROL_GLOBAL_GTT_IVB |
- PIPE_CONTROL_QW_WRITE |
- PIPE_CONTROL_CS_STALL);
- intel_ring_emit(ring, lower_32_bits(gtt_offset));
- intel_ring_emit(ring, upper_32_bits(gtt_offset));
- intel_ring_emit(ring, req->global_seqno);
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring,
- MI_SEMAPHORE_SIGNAL |
- MI_SEMAPHORE_TARGET(waiter->hw_id));
- intel_ring_emit(ring, 0);
+ *out++ = GFX_OP_PIPE_CONTROL(6);
+ *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_QW_WRITE |
+ PIPE_CONTROL_CS_STALL);
+ *out++ = lower_32_bits(gtt_offset);
+ *out++ = upper_32_bits(gtt_offset);
+ *out++ = req->global_seqno;
+ *out++ = 0;
+ *out++ = (MI_SEMAPHORE_SIGNAL |
+ MI_SEMAPHORE_TARGET(waiter->hw_id));
+ *out++ = 0;
}
- intel_ring_advance(ring);
- return 0;
+ return out;
}
-static int gen8_xcs_signal(struct drm_i915_gem_request *req)
+static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out)
{
- struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *waiter;
enum intel_engine_id id;
- int ret, num_rings;
-
- num_rings = INTEL_INFO(dev_priv)->num_rings;
- ret = intel_ring_begin(req, (num_rings-1) * 6);
- if (ret)
- return ret;
for_each_engine(waiter, dev_priv, id) {
u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue;
- intel_ring_emit(ring,
- (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
- intel_ring_emit(ring,
- lower_32_bits(gtt_offset) |
- MI_FLUSH_DW_USE_GTT);
- intel_ring_emit(ring, upper_32_bits(gtt_offset));
- intel_ring_emit(ring, req->global_seqno);
- intel_ring_emit(ring,
- MI_SEMAPHORE_SIGNAL |
- MI_SEMAPHORE_TARGET(waiter->hw_id));
- intel_ring_emit(ring, 0);
+ *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ *out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT;
+ *out++ = upper_32_bits(gtt_offset);
+ *out++ = req->global_seqno;
+ *out++ = (MI_SEMAPHORE_SIGNAL |
+ MI_SEMAPHORE_TARGET(waiter->hw_id));
+ *out++ = 0;
}
- intel_ring_advance(ring);
- return 0;
+ return out;
}
-static int gen6_signal(struct drm_i915_gem_request *req)
+static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out)
{
- struct intel_ring *ring = req->ring;
struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- int ret, num_rings;
-
- num_rings = INTEL_INFO(dev_priv)->num_rings;
- ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2));
- if (ret)
- return ret;
+ int num_rings = 0;
for_each_engine(engine, dev_priv, id) {
i915_reg_t mbox_reg;
mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id];
if (i915_mmio_reg_valid(mbox_reg)) {
- intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
- intel_ring_emit_reg(ring, mbox_reg);
- intel_ring_emit(ring, req->global_seqno);
+ *out++ = MI_LOAD_REGISTER_IMM(1);
+ *out++ = i915_mmio_reg_offset(mbox_reg);
+ *out++ = req->global_seqno;
+ num_rings++;
}
}
+ if (num_rings & 1)
+ *out++ = MI_NOOP;
- /* If num_dwords was rounded, make sure the tail pointer is correct */
- if (num_rings % 2 == 0)
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
-
- return 0;
+ return out;
}
static void i9xx_submit_request(struct drm_i915_gem_request *request)
{
struct drm_i915_private *dev_priv = request->i915;
- I915_WRITE_TAIL(request->engine,
- intel_ring_offset(request->ring, request->tail));
+ I915_WRITE_TAIL(request->engine, request->tail);
}
-static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req)
+static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req,
+ u32 *out)
{
- struct intel_ring *ring = req->ring;
- int ret;
-
- ret = intel_ring_begin(req, 4);
- if (ret)
- return ret;
-
- intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
- intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- intel_ring_emit(ring, req->global_seqno);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_advance(ring);
-
- req->tail = ring->tail;
+ *out++ = MI_STORE_DWORD_INDEX;
+ *out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
+ *out++ = req->global_seqno;
+ *out++ = MI_USER_INTERRUPT;
- return 0;
+ req->tail = intel_ring_offset(req->ring, out);
}
static const int i9xx_emit_breadcrumb_sz = 4;
* Update the mailbox registers in the *other* rings with the current seqno.
* This acts like a signal in the canonical semaphore.
*/
-static int gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req)
+static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req,
+ u32 *out)
{
- int ret;
-
- ret = req->engine->semaphore.signal(req);
- if (ret)
- return ret;
-
- return i9xx_emit_breadcrumb(req);
+ return i9xx_emit_breadcrumb(req,
+ req->engine->semaphore.signal(req, out));
}
-static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req)
+static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
+ u32 *out)
{
struct intel_engine_cs *engine = req->engine;
- struct intel_ring *ring = req->ring;
- int ret;
- if (engine->semaphore.signal) {
- ret = engine->semaphore.signal(req);
- if (ret)
- return ret;
- }
-
- ret = intel_ring_begin(req, 8);
- if (ret)
- return ret;
+ if (engine->semaphore.signal)
+ out = engine->semaphore.signal(req, out);
- intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
- intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB |
+ *out++ = GFX_OP_PIPE_CONTROL(6);
+ *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_QW_WRITE));
- intel_ring_emit(ring, intel_hws_seqno_address(engine));
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, req->global_seqno);
+ PIPE_CONTROL_QW_WRITE);
+ *out++ = intel_hws_seqno_address(engine);
+ *out++ = 0;
+ *out++ = req->global_seqno;
/* We're thrashing one dword of HWS. */
- intel_ring_emit(ring, 0);
- intel_ring_emit(ring, MI_USER_INTERRUPT);
- intel_ring_emit(ring, MI_NOOP);
- intel_ring_advance(ring);
-
- req->tail = ring->tail;
+ *out++ = 0;
+ *out++ = MI_USER_INTERRUPT;
+ *out++ = MI_NOOP;
- return 0;
+ req->tail = intel_ring_offset(req->ring, out);
}
static const int gen8_render_emit_breadcrumb_sz = 8;
#define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1)
#define I915_DISPATCH_RS BIT(2)
- int (*emit_breadcrumb)(struct drm_i915_gem_request *req);
+ void (*emit_breadcrumb)(struct drm_i915_gem_request *req,
+ u32 *out);
int emit_breadcrumb_sz;
/* Pass the request to the hardware queue (e.g. directly into
/* AKA wait() */
int (*sync_to)(struct drm_i915_gem_request *req,
struct drm_i915_gem_request *signal);
- int (*signal)(struct drm_i915_gem_request *req);
+ u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out);
} semaphore;
/* Execlists */
*/
}
-static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
+static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr)
{
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
- return value & (ring->size - 1);
+ u32 offset = addr - ring->vaddr;
+ return offset & (ring->size - 1);
}
int __intel_ring_space(int head, int tail, int size);