#define WA_TAIL_DWORDS 2
#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
+struct virtual_engine {
+ struct intel_engine_cs base;
+ struct intel_context context;
+
+ /*
+ * We allow only a single request through the virtual engine at a time
+ * (each request in the timeline waits for the completion fence of
+ * the previous before being submitted). By restricting ourselves to
+ * only submitting a single request, each request is placed on to a
+ * physical to maximise load spreading (by virtue of the late greedy
+ * scheduling -- each real engine takes the next available request
+ * upon idling).
+ */
+ struct i915_request *request;
+
+ /*
+ * We keep a rbtree of available virtual engines inside each physical
+ * engine, sorted by priority. Here we preallocate the nodes we need
+ * for the virtual engine, indexed by physical_engine->id.
+ */
+ struct ve_node {
+ struct rb_node rb;
+ int prio;
+ } nodes[I915_NUM_ENGINES];
+
+ /* And finally, which physical engines this virtual engine maps onto. */
+ unsigned int num_siblings;
+ struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+ GEM_BUG_ON(!intel_engine_is_virtual(engine));
+ return container_of(engine, struct virtual_engine, base);
+}
+
static int execlists_context_deferred_alloc(struct intel_context *ce,
struct intel_engine_cs *engine);
static void execlists_init_reg_state(u32 *reg_state,
}
static inline bool need_preempt(const struct intel_engine_cs *engine,
- const struct i915_request *rq)
+ const struct i915_request *rq,
+ struct rb_node *rb)
{
int last_prio;
rq_prio(list_next_entry(rq, link)) > last_prio)
return true;
+ if (rb) {
+ struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ bool preempt = false;
+
+ if (engine == ve->siblings[0]) { /* only preempt one sibling */
+ struct i915_request *next;
+
+ rcu_read_lock();
+ next = READ_ONCE(ve->request);
+ if (next)
+ preempt = rq_prio(next) > last_prio;
+ rcu_read_unlock();
+ }
+
+ if (preempt)
+ return preempt;
+ }
+
/*
* If the inflight context did not trigger the preemption, then maybe
* it was the set of queued requests? Pick the highest priority in
list_for_each_entry_safe_reverse(rq, rn,
&engine->timeline.requests,
link) {
+ struct intel_engine_cs *owner;
+
if (i915_request_completed(rq))
break;
GEM_BUG_ON(rq->hw_context->active);
- GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
- if (rq_prio(rq) != prio) {
- prio = rq_prio(rq);
- pl = i915_sched_lookup_priolist(engine, prio);
- }
- GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-
- list_add(&rq->sched.link, pl);
+ /*
+ * Push the request back into the queue for later resubmission.
+ * If this request is not native to this physical engine (i.e.
+ * it came from a virtual source), push it back onto the virtual
+ * engine so that it can be moved across onto another physical
+ * engine as load dictates.
+ */
+ owner = rq->hw_context->engine;
+ if (likely(owner == engine)) {
+ GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+ if (rq_prio(rq) != prio) {
+ prio = rq_prio(rq);
+ pl = i915_sched_lookup_priolist(engine, prio);
+ }
+ GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
- active = rq;
+ list_add(&rq->sched.link, pl);
+ active = rq;
+ } else {
+ rq->engine = owner;
+ owner->submit_request(rq);
+ active = NULL;
+ }
}
return active;
execlists));
}
+static void virtual_update_register_offsets(u32 *regs,
+ struct intel_engine_cs *engine)
+{
+ u32 base = engine->mmio_base;
+
+ /* Must match execlists_init_reg_state()! */
+
+ regs[CTX_CONTEXT_CONTROL] =
+ i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
+ regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+ regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+ regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+ regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+ regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+ regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+ regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+ regs[CTX_SECOND_BB_HEAD_U] =
+ i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+ regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+ regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+ regs[CTX_CTX_TIMESTAMP] =
+ i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+ regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
+ regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
+ regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
+ regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
+ regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
+ regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
+ regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
+ regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
+
+ if (engine->class == RENDER_CLASS) {
+ regs[CTX_RCS_INDIRECT_CTX] =
+ i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+ regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+ i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+ regs[CTX_BB_PER_CTX_PTR] =
+ i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+ regs[CTX_R_PWR_CLK_STATE] =
+ i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+ }
+}
+
+static bool virtual_matches(const struct virtual_engine *ve,
+ const struct i915_request *rq,
+ const struct intel_engine_cs *engine)
+{
+ const struct intel_engine_cs *active;
+
+ /*
+ * We track when the HW has completed saving the context image
+ * (i.e. when we have seen the final CS event switching out of
+ * the context) and must not overwrite the context image before
+ * then. This restricts us to only using the active engine
+ * while the previous virtualized request is inflight (so
+ * we reuse the register offsets). This is a very small
+ * hystersis on the greedy seelction algorithm.
+ */
+ active = READ_ONCE(ve->context.active);
+ if (active && active != engine)
+ return false;
+
+ return true;
+}
+
+static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
+ struct intel_engine_cs *engine)
+{
+ struct intel_engine_cs *old = ve->siblings[0];
+
+ /* All unattached (rq->engine == old) must already be completed */
+
+ spin_lock(&old->breadcrumbs.irq_lock);
+ if (!list_empty(&ve->context.signal_link)) {
+ list_move_tail(&ve->context.signal_link,
+ &engine->breadcrumbs.signalers);
+ intel_engine_queue_breadcrumbs(engine);
+ }
+ spin_unlock(&old->breadcrumbs.irq_lock);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
* and context switches) submission.
*/
+ for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+ struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ struct i915_request *rq = READ_ONCE(ve->request);
+
+ if (!rq) { /* lazily cleanup after another engine handled rq */
+ rb_erase_cached(rb, &execlists->virtual);
+ RB_CLEAR_NODE(rb);
+ rb = rb_first_cached(&execlists->virtual);
+ continue;
+ }
+
+ if (!virtual_matches(ve, rq, engine)) {
+ rb = rb_next(rb);
+ continue;
+ }
+
+ break;
+ }
+
if (last) {
/*
* Don't resubmit or switch until all outstanding
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
return;
- if (need_preempt(engine, last)) {
+ if (need_preempt(engine, last, rb)) {
inject_preempt_context(engine);
return;
}
last->tail = last->wa_tail;
}
+ while (rb) { /* XXX virtual is always taking precedence */
+ struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ struct i915_request *rq;
+
+ spin_lock(&ve->base.timeline.lock);
+
+ rq = ve->request;
+ if (unlikely(!rq)) { /* lost the race to a sibling */
+ spin_unlock(&ve->base.timeline.lock);
+ rb_erase_cached(rb, &execlists->virtual);
+ RB_CLEAR_NODE(rb);
+ rb = rb_first_cached(&execlists->virtual);
+ continue;
+ }
+
+ GEM_BUG_ON(rq != ve->request);
+ GEM_BUG_ON(rq->engine != &ve->base);
+ GEM_BUG_ON(rq->hw_context != &ve->context);
+
+ if (rq_prio(rq) >= queue_prio(execlists)) {
+ if (!virtual_matches(ve, rq, engine)) {
+ spin_unlock(&ve->base.timeline.lock);
+ rb = rb_next(rb);
+ continue;
+ }
+
+ if (last && !can_merge_rq(last, rq)) {
+ spin_unlock(&ve->base.timeline.lock);
+ return; /* leave this rq for another engine */
+ }
+
+ GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
+ engine->name,
+ rq->fence.context,
+ rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ yesno(engine != ve->siblings[0]));
+
+ ve->request = NULL;
+ ve->base.execlists.queue_priority_hint = INT_MIN;
+ rb_erase_cached(rb, &execlists->virtual);
+ RB_CLEAR_NODE(rb);
+
+ rq->engine = engine;
+
+ if (engine != ve->siblings[0]) {
+ u32 *regs = ve->context.lrc_reg_state;
+ unsigned int n;
+
+ GEM_BUG_ON(READ_ONCE(ve->context.active));
+ virtual_update_register_offsets(regs, engine);
+
+ if (!list_empty(&ve->context.signals))
+ virtual_xfer_breadcrumbs(ve, engine);
+
+ /*
+ * Move the bound engine to the top of the list
+ * for future execution. We then kick this
+ * tasklet first before checking others, so that
+ * we preferentially reuse this set of bound
+ * registers.
+ */
+ for (n = 1; n < ve->num_siblings; n++) {
+ if (ve->siblings[n] == engine) {
+ swap(ve->siblings[n],
+ ve->siblings[0]);
+ break;
+ }
+ }
+
+ GEM_BUG_ON(ve->siblings[0] != engine);
+ }
+
+ __i915_request_submit(rq);
+ trace_i915_request_in(rq, port_index(port, execlists));
+ submit = true;
+ last = rq;
+ }
+
+ spin_unlock(&ve->base.timeline.lock);
+ break;
+ }
+
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
&execlists->csb_status[reset_value]);
}
+static struct i915_request *active_request(struct i915_request *rq)
+{
+ const struct list_head * const list = &rq->engine->timeline.requests;
+ const struct intel_context * const context = rq->hw_context;
+ struct i915_request *active = NULL;
+
+ list_for_each_entry_from_reverse(rq, list, link) {
+ if (i915_request_completed(rq))
+ break;
+
+ if (rq->hw_context != context)
+ break;
+
+ active = rq;
+ }
+
+ return active;
+}
+
static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
if (!port_isset(execlists->port))
goto out_clear;
- ce = port_request(execlists->port)->hw_context;
+ rq = port_request(execlists->port);
+ ce = rq->hw_context;
/*
* Catch up with any missed context-switch interrupts.
*/
execlists_cancel_port_requests(execlists);
- /* Push back any incomplete requests for replay after the reset. */
- rq = __unwind_incomplete_requests(engine);
+ rq = active_request(rq);
if (!rq)
goto out_replay;
- if (rq->hw_context != ce) { /* caught just before a CS event */
- rq = NULL;
- goto out_replay;
- }
-
/*
* If this request hasn't started yet, e.g. it is waiting on a
* semaphore, we need to avoid skipping the request or else we
}
execlists_init_reg_state(regs, ce, engine, ce->ring);
- /* Rerun the request; its payload has been neutered (if guilty). */
out_replay:
+ /* Rerun the request; its payload has been neutered (if guilty). */
ce->ring->head =
rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
intel_ring_update_space(ce->ring);
__execlists_update_reg_state(ce, engine);
+ /* Push back any incomplete requests for replay after the reset. */
+ __unwind_incomplete_requests(engine);
+
out_clear:
execlists_clear_all_active(execlists);
}
i915_priolist_free(p);
}
+ /* Cancel all attached virtual engines */
+ while ((rb = rb_first_cached(&execlists->virtual))) {
+ struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+
+ rb_erase_cached(rb, &execlists->virtual);
+ RB_CLEAR_NODE(rb);
+
+ spin_lock(&ve->base.timeline.lock);
+ if (ve->request) {
+ ve->request->engine = engine;
+ __i915_request_submit(ve->request);
+ dma_fence_set_error(&ve->request->fence, -EIO);
+ i915_request_mark_complete(ve->request);
+ ve->base.execlists.queue_priority_hint = INT_MIN;
+ ve->request = NULL;
+ }
+ spin_unlock(&ve->base.timeline.lock);
+ }
+
/* Remaining _unready_ requests will be nop'ed when submitted */
execlists->queue_priority_hint = INT_MIN;
bool rcs = engine->class == RENDER_CLASS;
u32 base = engine->mmio_base;
- /* A context is actually a big batch buffer with several
+ /*
+ * A context is actually a big batch buffer with several
* MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
* values we are setting here are only for the first context restore:
* on a subsequent save, the GPU will recreate this batchbuffer with new
* values (including all the missing MI_LOAD_REGISTER_IMM commands that
* we are not initializing here).
+ *
+ * Must keep consistent with virtual_update_register_offsets().
*/
regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
MI_LRI_FORCE_POSTED;
return ret;
}
+static void virtual_context_destroy(struct kref *kref)
+{
+ struct virtual_engine *ve =
+ container_of(kref, typeof(*ve), context.ref);
+ unsigned int n;
+
+ GEM_BUG_ON(ve->request);
+ GEM_BUG_ON(ve->context.active);
+
+ for (n = 0; n < ve->num_siblings; n++) {
+ struct intel_engine_cs *sibling = ve->siblings[n];
+ struct rb_node *node = &ve->nodes[sibling->id].rb;
+
+ if (RB_EMPTY_NODE(node))
+ continue;
+
+ spin_lock_irq(&sibling->timeline.lock);
+
+ /* Detachment is lazily performed in the execlists tasklet */
+ if (!RB_EMPTY_NODE(node))
+ rb_erase_cached(node, &sibling->execlists.virtual);
+
+ spin_unlock_irq(&sibling->timeline.lock);
+ }
+ GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+ if (ve->context.state)
+ __execlists_context_fini(&ve->context);
+
+ i915_timeline_fini(&ve->base.timeline);
+ kfree(ve);
+}
+
+static void virtual_engine_initial_hint(struct virtual_engine *ve)
+{
+ int swp;
+
+ /*
+ * Pick a random sibling on starting to help spread the load around.
+ *
+ * New contexts are typically created with exactly the same order
+ * of siblings, and often started in batches. Due to the way we iterate
+ * the array of sibling when submitting requests, sibling[0] is
+ * prioritised for dequeuing. If we make sure that sibling[0] is fairly
+ * randomised across the system, we also help spread the load by the
+ * first engine we inspect being different each time.
+ *
+ * NB This does not force us to execute on this engine, it will just
+ * typically be the first we inspect for submission.
+ */
+ swp = prandom_u32_max(ve->num_siblings);
+ if (!swp)
+ return;
+
+ swap(ve->siblings[swp], ve->siblings[0]);
+ virtual_update_register_offsets(ve->context.lrc_reg_state,
+ ve->siblings[0]);
+}
+
+static int virtual_context_pin(struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+ int err;
+
+ /* Note: we must use a real engine class for setting up reg state */
+ err = __execlists_context_pin(ce, ve->siblings[0]);
+ if (err)
+ return err;
+
+ virtual_engine_initial_hint(ve);
+ return 0;
+}
+
+static void virtual_context_enter(struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+ unsigned int n;
+
+ for (n = 0; n < ve->num_siblings; n++)
+ intel_engine_pm_get(ve->siblings[n]);
+}
+
+static void virtual_context_exit(struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+ unsigned int n;
+
+ ce->saturated = 0;
+ for (n = 0; n < ve->num_siblings; n++)
+ intel_engine_pm_put(ve->siblings[n]);
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+ .pin = virtual_context_pin,
+ .unpin = execlists_context_unpin,
+
+ .enter = virtual_context_enter,
+ .exit = virtual_context_exit,
+
+ .destroy = virtual_context_destroy,
+};
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+ struct virtual_engine * const ve = (struct virtual_engine *)data;
+ const int prio = ve->base.execlists.queue_priority_hint;
+ unsigned int n;
+
+ local_irq_disable();
+ for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
+ struct intel_engine_cs *sibling = ve->siblings[n];
+ struct ve_node * const node = &ve->nodes[sibling->id];
+ struct rb_node **parent, *rb;
+ bool first;
+
+ spin_lock(&sibling->timeline.lock);
+
+ if (!RB_EMPTY_NODE(&node->rb)) {
+ /*
+ * Cheat and avoid rebalancing the tree if we can
+ * reuse this node in situ.
+ */
+ first = rb_first_cached(&sibling->execlists.virtual) ==
+ &node->rb;
+ if (prio == node->prio || (prio > node->prio && first))
+ goto submit_engine;
+
+ rb_erase_cached(&node->rb, &sibling->execlists.virtual);
+ }
+
+ rb = NULL;
+ first = true;
+ parent = &sibling->execlists.virtual.rb_root.rb_node;
+ while (*parent) {
+ struct ve_node *other;
+
+ rb = *parent;
+ other = rb_entry(rb, typeof(*other), rb);
+ if (prio > other->prio) {
+ parent = &rb->rb_left;
+ } else {
+ parent = &rb->rb_right;
+ first = false;
+ }
+ }
+
+ rb_link_node(&node->rb, rb, parent);
+ rb_insert_color_cached(&node->rb,
+ &sibling->execlists.virtual,
+ first);
+
+submit_engine:
+ GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
+ node->prio = prio;
+ if (first && prio > sibling->execlists.queue_priority_hint) {
+ sibling->execlists.queue_priority_hint = prio;
+ tasklet_hi_schedule(&sibling->execlists.tasklet);
+ }
+
+ spin_unlock(&sibling->timeline.lock);
+ }
+ local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *rq)
+{
+ struct virtual_engine *ve = to_virtual_engine(rq->engine);
+
+ GEM_TRACE("%s: rq=%llx:%lld\n",
+ ve->base.name,
+ rq->fence.context,
+ rq->fence.seqno);
+
+ GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
+
+ GEM_BUG_ON(ve->request);
+ ve->base.execlists.queue_priority_hint = rq_prio(rq);
+ WRITE_ONCE(ve->request, rq);
+
+ tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
+struct intel_context *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+ struct intel_engine_cs **siblings,
+ unsigned int count)
+{
+ struct virtual_engine *ve;
+ unsigned int n;
+ int err;
+
+ if (count == 0)
+ return ERR_PTR(-EINVAL);
+
+ if (count == 1)
+ return intel_context_create(ctx, siblings[0]);
+
+ ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
+ if (!ve)
+ return ERR_PTR(-ENOMEM);
+
+ ve->base.i915 = ctx->i915;
+ ve->base.id = -1;
+ ve->base.class = OTHER_CLASS;
+ ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
+ ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+ ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+ snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+
+ err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
+ if (err)
+ goto err_put;
+ i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
+
+ intel_engine_init_execlists(&ve->base);
+
+ ve->base.cops = &virtual_context_ops;
+ ve->base.request_alloc = execlists_request_alloc;
+
+ ve->base.schedule = i915_schedule;
+ ve->base.submit_request = virtual_submit_request;
+
+ ve->base.execlists.queue_priority_hint = INT_MIN;
+ tasklet_init(&ve->base.execlists.tasklet,
+ virtual_submission_tasklet,
+ (unsigned long)ve);
+
+ intel_context_init(&ve->context, ctx, &ve->base);
+
+ for (n = 0; n < count; n++) {
+ struct intel_engine_cs *sibling = siblings[n];
+
+ GEM_BUG_ON(!is_power_of_2(sibling->mask));
+ if (sibling->mask & ve->base.mask) {
+ DRM_DEBUG("duplicate %s entry in load balancer\n",
+ sibling->name);
+ err = -EINVAL;
+ goto err_put;
+ }
+
+ /*
+ * The virtual engine implementation is tightly coupled to
+ * the execlists backend -- we push out request directly
+ * into a tree inside each physical engine. We could support
+ * layering if we handle cloning of the requests and
+ * submitting a copy into each backend.
+ */
+ if (sibling->execlists.tasklet.func !=
+ execlists_submission_tasklet) {
+ err = -ENODEV;
+ goto err_put;
+ }
+
+ GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
+ RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
+
+ ve->siblings[ve->num_siblings++] = sibling;
+ ve->base.mask |= sibling->mask;
+
+ /*
+ * All physical engines must be compatible for their emission
+ * functions (as we build the instructions during request
+ * construction and do not alter them before submission
+ * on the physical engine). We use the engine class as a guide
+ * here, although that could be refined.
+ */
+ if (ve->base.class != OTHER_CLASS) {
+ if (ve->base.class != sibling->class) {
+ DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
+ sibling->class, ve->base.class);
+ err = -EINVAL;
+ goto err_put;
+ }
+ continue;
+ }
+
+ ve->base.class = sibling->class;
+ ve->base.uabi_class = sibling->uabi_class;
+ snprintf(ve->base.name, sizeof(ve->base.name),
+ "v%dx%d", ve->base.class, count);
+ ve->base.context_size = sibling->context_size;
+
+ ve->base.emit_bb_start = sibling->emit_bb_start;
+ ve->base.emit_flush = sibling->emit_flush;
+ ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
+ ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
+ ve->base.emit_fini_breadcrumb_dw =
+ sibling->emit_fini_breadcrumb_dw;
+ }
+
+ return &ve->context;
+
+err_put:
+ intel_context_put(&ve->context);
+ return ERR_PTR(err);
+}
+
+struct intel_context *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+ struct intel_engine_cs *src)
+{
+ struct virtual_engine *se = to_virtual_engine(src);
+ struct intel_context *dst;
+
+ dst = intel_execlists_create_virtual(ctx,
+ se->siblings,
+ se->num_siblings);
+ if (IS_ERR(dst))
+ return dst;
+
+ return dst;
+}
+
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
show_request(m, last, "\t\tQ ");
}
+ last = NULL;
+ count = 0;
+ for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
+ struct virtual_engine *ve =
+ rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+ struct i915_request *rq = READ_ONCE(ve->request);
+
+ if (rq) {
+ if (count++ < max - 1)
+ show_request(m, rq, "\t\tV ");
+ else
+ last = rq;
+ }
+ }
+ if (last) {
+ if (count > max) {
+ drm_printf(m,
+ "\t\t...skipping %d virtual requests...\n",
+ count - max);
+ }
+ show_request(m, last, "\t\tV ");
+ }
+
spin_unlock_irqrestore(&engine->timeline.lock, flags);
}
return err;
}
+static int nop_virtual_engine(struct drm_i915_private *i915,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling,
+ unsigned int nctx,
+ unsigned int flags)
+#define CHAIN BIT(0)
+{
+ IGT_TIMEOUT(end_time);
+ struct i915_request *request[16];
+ struct i915_gem_context *ctx[16];
+ struct intel_context *ve[16];
+ unsigned long n, prime, nc;
+ struct igt_live_test t;
+ ktime_t times[2] = {};
+ int err;
+
+ GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+ for (n = 0; n < nctx; n++) {
+ ctx[n] = kernel_context(i915);
+ if (!ctx[n]) {
+ err = -ENOMEM;
+ nctx = n;
+ goto out;
+ }
+
+ ve[n] = intel_execlists_create_virtual(ctx[n],
+ siblings, nsibling);
+ if (IS_ERR(ve[n])) {
+ kernel_context_close(ctx[n]);
+ err = PTR_ERR(ve[n]);
+ nctx = n;
+ goto out;
+ }
+
+ err = intel_context_pin(ve[n]);
+ if (err) {
+ intel_context_put(ve[n]);
+ kernel_context_close(ctx[n]);
+ nctx = n;
+ goto out;
+ }
+ }
+
+ err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+ if (err)
+ goto out;
+
+ for_each_prime_number_from(prime, 1, 8192) {
+ times[1] = ktime_get_raw();
+
+ if (flags & CHAIN) {
+ for (nc = 0; nc < nctx; nc++) {
+ for (n = 0; n < prime; n++) {
+ request[nc] =
+ i915_request_create(ve[nc]);
+ if (IS_ERR(request[nc])) {
+ err = PTR_ERR(request[nc]);
+ goto out;
+ }
+
+ i915_request_add(request[nc]);
+ }
+ }
+ } else {
+ for (n = 0; n < prime; n++) {
+ for (nc = 0; nc < nctx; nc++) {
+ request[nc] =
+ i915_request_create(ve[nc]);
+ if (IS_ERR(request[nc])) {
+ err = PTR_ERR(request[nc]);
+ goto out;
+ }
+
+ i915_request_add(request[nc]);
+ }
+ }
+ }
+
+ for (nc = 0; nc < nctx; nc++) {
+ if (i915_request_wait(request[nc],
+ I915_WAIT_LOCKED,
+ HZ / 10) < 0) {
+ pr_err("%s(%s): wait for %llx:%lld timed out\n",
+ __func__, ve[0]->engine->name,
+ request[nc]->fence.context,
+ request[nc]->fence.seqno);
+
+ GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+ __func__, ve[0]->engine->name,
+ request[nc]->fence.context,
+ request[nc]->fence.seqno);
+ GEM_TRACE_DUMP();
+ i915_gem_set_wedged(i915);
+ break;
+ }
+ }
+
+ times[1] = ktime_sub(ktime_get_raw(), times[1]);
+ if (prime == 1)
+ times[0] = times[1];
+
+ if (__igt_timeout(end_time, NULL))
+ break;
+ }
+
+ err = igt_live_test_end(&t);
+ if (err)
+ goto out;
+
+ pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+ nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
+ prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ for (nc = 0; nc < nctx; nc++) {
+ intel_context_unpin(ve[nc]);
+ intel_context_put(ve[nc]);
+ kernel_context_close(ctx[nc]);
+ }
+ return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned int class, inst;
+ int err = -ENODEV;
+
+ if (USES_GUC_SUBMISSION(i915))
+ return 0;
+
+ mutex_lock(&i915->drm.struct_mutex);
+
+ for_each_engine(engine, i915, id) {
+ err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+ if (err) {
+ pr_err("Failed to wrap engine %s: err=%d\n",
+ engine->name, err);
+ goto out_unlock;
+ }
+ }
+
+ for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+ int nsibling, n;
+
+ nsibling = 0;
+ for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+ if (!i915->engine_class[class][inst])
+ continue;
+
+ siblings[nsibling++] = i915->engine_class[class][inst];
+ }
+ if (nsibling < 2)
+ continue;
+
+ for (n = 1; n <= nsibling + 1; n++) {
+ err = nop_virtual_engine(i915, siblings, nsibling,
+ n, 0);
+ if (err)
+ goto out_unlock;
+ }
+
+ err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+ if (err)
+ goto out_unlock;
+ }
+
+out_unlock:
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
int intel_execlists_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_chain_preempt),
SUBTEST(live_preempt_hang),
SUBTEST(live_preempt_smoke),
+ SUBTEST(live_virtual_engine),
};
if (!HAS_EXECLISTS(i915))