drm/i915: Load balancing across a virtual engine

author Chris Wilson <chris@chris-wilson.co.uk>

Tue, 21 May 2019 21:11:30 +0000 (22:11 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Wed, 22 May 2019 07:40:38 +0000 (08:40 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Tue, 21 May 2019 21:11:30 +0000 (22:11 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Wed, 22 May 2019 07:40:38 +0000 (08:40 +0100)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h

index e381c1c7390288fb786a055c6d895f785142cc8d..7b47e00fa082904f0fbc81f92ca974d772df3852 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -227,6 +227,7 @@ struct intel_engine_execlists {
          * @queue: queue of requests, in priority lists
          */
         struct rb_root_cached queue;
+       struct rb_root_cached virtual;
  
         /**
          * @csb_write: control register for Context Switch buffer
@@ -445,6 +446,7 @@ struct intel_engine_cs {
  #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
  #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
  #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
+#define I915_ENGINE_IS_VIRTUAL       BIT(5)
         unsigned int flags;
  
         /*
@@ -534,6 +536,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
         return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
  }
  
+static inline bool
+intel_engine_is_virtual(const struct intel_engine_cs *engine)
+{
+       return engine->flags & I915_ENGINE_IS_VIRTUAL;
+}
+
  #define instdone_slice_mask(dev_priv__) \
         (IS_GEN(dev_priv__, 7) ? \
          1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index f263a8374273421733027531468ef9649e920462..affa5e2dfce1d8597c33462d703134b627f0bd82 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -164,6 +164,42 @@
  #define WA_TAIL_DWORDS 2
  #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
  
+struct virtual_engine {
+       struct intel_engine_cs base;
+       struct intel_context context;
+
+       /*
+        * We allow only a single request through the virtual engine at a time
+        * (each request in the timeline waits for the completion fence of
+        * the previous before being submitted). By restricting ourselves to
+        * only submitting a single request, each request is placed on to a
+        * physical to maximise load spreading (by virtue of the late greedy
+        * scheduling -- each real engine takes the next available request
+        * upon idling).
+        */
+       struct i915_request *request;
+
+       /*
+        * We keep a rbtree of available virtual engines inside each physical
+        * engine, sorted by priority. Here we preallocate the nodes we need
+        * for the virtual engine, indexed by physical_engine->id.
+        */
+       struct ve_node {
+               struct rb_node rb;
+               int prio;
+       } nodes[I915_NUM_ENGINES];
+
+       /* And finally, which physical engines this virtual engine maps onto. */
+       unsigned int num_siblings;
+       struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+       GEM_BUG_ON(!intel_engine_is_virtual(engine));
+       return container_of(engine, struct virtual_engine, base);
+}
+
  static int execlists_context_deferred_alloc(struct intel_context *ce,
                                             struct intel_engine_cs *engine);
  static void execlists_init_reg_state(u32 *reg_state,
@@ -216,7 +252,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
  }
  
  static inline bool need_preempt(const struct intel_engine_cs *engine,
-                               const struct i915_request *rq)
+                               const struct i915_request *rq,
+                               struct rb_node *rb)
  {
         int last_prio;
  
@@ -251,6 +288,25 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
             rq_prio(list_next_entry(rq, link)) > last_prio)
                 return true;
  
+       if (rb) {
+               struct virtual_engine *ve =
+                       rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+               bool preempt = false;
+
+               if (engine == ve->siblings[0]) { /* only preempt one sibling */
+                       struct i915_request *next;
+
+                       rcu_read_lock();
+                       next = READ_ONCE(ve->request);
+                       if (next)
+                               preempt = rq_prio(next) > last_prio;
+                       rcu_read_unlock();
+               }
+
+               if (preempt)
+                       return preempt;
+       }
+
         /*
          * If the inflight context did not trigger the preemption, then maybe
          * it was the set of queued requests? Pick the highest priority in
@@ -369,6 +425,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
         list_for_each_entry_safe_reverse(rq, rn,
                                          &engine->timeline.requests,
                                          link) {
+               struct intel_engine_cs *owner;
+
                 if (i915_request_completed(rq))
                         break;
  
@@ -377,16 +435,29 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
  
                 GEM_BUG_ON(rq->hw_context->active);
  
-               GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-               if (rq_prio(rq) != prio) {
-                       prio = rq_prio(rq);
-                       pl = i915_sched_lookup_priolist(engine, prio);
-               }
-               GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-
-               list_add(&rq->sched.link, pl);
+               /*
+                * Push the request back into the queue for later resubmission.
+                * If this request is not native to this physical engine (i.e.
+                * it came from a virtual source), push it back onto the virtual
+                * engine so that it can be moved across onto another physical
+                * engine as load dictates.
+                */
+               owner = rq->hw_context->engine;
+               if (likely(owner == engine)) {
+                       GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+                       if (rq_prio(rq) != prio) {
+                               prio = rq_prio(rq);
+                               pl = i915_sched_lookup_priolist(engine, prio);
+                       }
+                       GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
  
-               active = rq;
+                       list_add(&rq->sched.link, pl);
+                       active = rq;
+               } else {
+                       rq->engine = owner;
+                       owner->submit_request(rq);
+                       active = NULL;
+               }
         }
  
         return active;
@@ -622,6 +693,90 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
                                                   execlists));
  }
  
+static void virtual_update_register_offsets(u32 *regs,
+                                           struct intel_engine_cs *engine)
+{
+       u32 base = engine->mmio_base;
+
+       /* Must match execlists_init_reg_state()! */
+
+       regs[CTX_CONTEXT_CONTROL] =
+               i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
+       regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+       regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+       regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+       regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+       regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+       regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+       regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+       regs[CTX_SECOND_BB_HEAD_U] =
+               i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+       regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+       regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+       regs[CTX_CTX_TIMESTAMP] =
+               i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+       regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
+       regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
+       regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
+       regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
+       regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
+       regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
+       regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
+       regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
+
+       if (engine->class == RENDER_CLASS) {
+               regs[CTX_RCS_INDIRECT_CTX] =
+                       i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+               regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+                       i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+               regs[CTX_BB_PER_CTX_PTR] =
+                       i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+               regs[CTX_R_PWR_CLK_STATE] =
+                       i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+       }
+}
+
+static bool virtual_matches(const struct virtual_engine *ve,
+                           const struct i915_request *rq,
+                           const struct intel_engine_cs *engine)
+{
+       const struct intel_engine_cs *active;
+
+       /*
+        * We track when the HW has completed saving the context image
+        * (i.e. when we have seen the final CS event switching out of
+        * the context) and must not overwrite the context image before
+        * then. This restricts us to only using the active engine
+        * while the previous virtualized request is inflight (so
+        * we reuse the register offsets). This is a very small
+        * hystersis on the greedy seelction algorithm.
+        */
+       active = READ_ONCE(ve->context.active);
+       if (active && active != engine)
+               return false;
+
+       return true;
+}
+
+static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
+                                    struct intel_engine_cs *engine)
+{
+       struct intel_engine_cs *old = ve->siblings[0];
+
+       /* All unattached (rq->engine == old) must already be completed */
+
+       spin_lock(&old->breadcrumbs.irq_lock);
+       if (!list_empty(&ve->context.signal_link)) {
+               list_move_tail(&ve->context.signal_link,
+                              &engine->breadcrumbs.signalers);
+               intel_engine_queue_breadcrumbs(engine);
+       }
+       spin_unlock(&old->breadcrumbs.irq_lock);
+}
+
  static void execlists_dequeue(struct intel_engine_cs *engine)
  {
         struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -654,6 +809,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
          * and context switches) submission.
          */
  
+       for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+               struct virtual_engine *ve =
+                       rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+               struct i915_request *rq = READ_ONCE(ve->request);
+
+               if (!rq) { /* lazily cleanup after another engine handled rq */
+                       rb_erase_cached(rb, &execlists->virtual);
+                       RB_CLEAR_NODE(rb);
+                       rb = rb_first_cached(&execlists->virtual);
+                       continue;
+               }
+
+               if (!virtual_matches(ve, rq, engine)) {
+                       rb = rb_next(rb);
+                       continue;
+               }
+
+               break;
+       }
+
         if (last) {
                 /*
                  * Don't resubmit or switch until all outstanding
@@ -675,7 +850,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                 if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
                         return;
  
-               if (need_preempt(engine, last)) {
+               if (need_preempt(engine, last, rb)) {
                         inject_preempt_context(engine);
                         return;
                 }
@@ -715,6 +890,92 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                 last->tail = last->wa_tail;
         }
  
+       while (rb) { /* XXX virtual is always taking precedence */
+               struct virtual_engine *ve =
+                       rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+               struct i915_request *rq;
+
+               spin_lock(&ve->base.timeline.lock);
+
+               rq = ve->request;
+               if (unlikely(!rq)) { /* lost the race to a sibling */
+                       spin_unlock(&ve->base.timeline.lock);
+                       rb_erase_cached(rb, &execlists->virtual);
+                       RB_CLEAR_NODE(rb);
+                       rb = rb_first_cached(&execlists->virtual);
+                       continue;
+               }
+
+               GEM_BUG_ON(rq != ve->request);
+               GEM_BUG_ON(rq->engine != &ve->base);
+               GEM_BUG_ON(rq->hw_context != &ve->context);
+
+               if (rq_prio(rq) >= queue_prio(execlists)) {
+                       if (!virtual_matches(ve, rq, engine)) {
+                               spin_unlock(&ve->base.timeline.lock);
+                               rb = rb_next(rb);
+                               continue;
+                       }
+
+                       if (last && !can_merge_rq(last, rq)) {
+                               spin_unlock(&ve->base.timeline.lock);
+                               return; /* leave this rq for another engine */
+                       }
+
+                       GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
+                                 engine->name,
+                                 rq->fence.context,
+                                 rq->fence.seqno,
+                                 i915_request_completed(rq) ? "!" :
+                                 i915_request_started(rq) ? "*" :
+                                 "",
+                                 yesno(engine != ve->siblings[0]));
+
+                       ve->request = NULL;
+                       ve->base.execlists.queue_priority_hint = INT_MIN;
+                       rb_erase_cached(rb, &execlists->virtual);
+                       RB_CLEAR_NODE(rb);
+
+                       rq->engine = engine;
+
+                       if (engine != ve->siblings[0]) {
+                               u32 *regs = ve->context.lrc_reg_state;
+                               unsigned int n;
+
+                               GEM_BUG_ON(READ_ONCE(ve->context.active));
+                               virtual_update_register_offsets(regs, engine);
+
+                               if (!list_empty(&ve->context.signals))
+                                       virtual_xfer_breadcrumbs(ve, engine);
+
+                               /*
+                                * Move the bound engine to the top of the list
+                                * for future execution. We then kick this
+                                * tasklet first before checking others, so that
+                                * we preferentially reuse this set of bound
+                                * registers.
+                                */
+                               for (n = 1; n < ve->num_siblings; n++) {
+                                       if (ve->siblings[n] == engine) {
+                                               swap(ve->siblings[n],
+                                                    ve->siblings[0]);
+                                               break;
+                                       }
+                               }
+
+                               GEM_BUG_ON(ve->siblings[0] != engine);
+                       }
+
+                       __i915_request_submit(rq);
+                       trace_i915_request_in(rq, port_index(port, execlists));
+                       submit = true;
+                       last = rq;
+               }
+
+               spin_unlock(&ve->base.timeline.lock);
+               break;
+       }
+
         while ((rb = rb_first_cached(&execlists->queue))) {
                 struct i915_priolist *p = to_priolist(rb);
                 struct i915_request *rq, *rn;
@@ -1838,6 +2099,25 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
                                &execlists->csb_status[reset_value]);
  }
  
+static struct i915_request *active_request(struct i915_request *rq)
+{
+       const struct list_head * const list = &rq->engine->timeline.requests;
+       const struct intel_context * const context = rq->hw_context;
+       struct i915_request *active = NULL;
+
+       list_for_each_entry_from_reverse(rq, list, link) {
+               if (i915_request_completed(rq))
+                       break;
+
+               if (rq->hw_context != context)
+                       break;
+
+               active = rq;
+       }
+
+       return active;
+}
+
  static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
  {
         struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1858,7 +2138,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
         if (!port_isset(execlists->port))
                 goto out_clear;
  
-       ce = port_request(execlists->port)->hw_context;
+       rq = port_request(execlists->port);
+       ce = rq->hw_context;
  
         /*
          * Catch up with any missed context-switch interrupts.
@@ -1871,16 +2152,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
          */
         execlists_cancel_port_requests(execlists);
  
-       /* Push back any incomplete requests for replay after the reset. */
-       rq = __unwind_incomplete_requests(engine);
+       rq = active_request(rq);
         if (!rq)
                 goto out_replay;
  
-       if (rq->hw_context != ce) { /* caught just before a CS event */
-               rq = NULL;
-               goto out_replay;
-       }
-
         /*
          * If this request hasn't started yet, e.g. it is waiting on a
          * semaphore, we need to avoid skipping the request or else we
@@ -1927,13 +2202,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
         }
         execlists_init_reg_state(regs, ce, engine, ce->ring);
  
-       /* Rerun the request; its payload has been neutered (if guilty). */
  out_replay:
+       /* Rerun the request; its payload has been neutered (if guilty). */
         ce->ring->head =
                 rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
         intel_ring_update_space(ce->ring);
         __execlists_update_reg_state(ce, engine);
  
+       /* Push back any incomplete requests for replay after the reset. */
+       __unwind_incomplete_requests(engine);
+
  out_clear:
         execlists_clear_all_active(execlists);
  }
@@ -2007,6 +2285,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
                 i915_priolist_free(p);
         }
  
+       /* Cancel all attached virtual engines */
+       while ((rb = rb_first_cached(&execlists->virtual))) {
+               struct virtual_engine *ve =
+                       rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+
+               rb_erase_cached(rb, &execlists->virtual);
+               RB_CLEAR_NODE(rb);
+
+               spin_lock(&ve->base.timeline.lock);
+               if (ve->request) {
+                       ve->request->engine = engine;
+                       __i915_request_submit(ve->request);
+                       dma_fence_set_error(&ve->request->fence, -EIO);
+                       i915_request_mark_complete(ve->request);
+                       ve->base.execlists.queue_priority_hint = INT_MIN;
+                       ve->request = NULL;
+               }
+               spin_unlock(&ve->base.timeline.lock);
+       }
+
         /* Remaining _unready_ requests will be nop'ed when submitted */
  
         execlists->queue_priority_hint = INT_MIN;
@@ -2491,12 +2789,15 @@ static void execlists_init_reg_state(u32 *regs,
         bool rcs = engine->class == RENDER_CLASS;
         u32 base = engine->mmio_base;
  
-       /* A context is actually a big batch buffer with several
+       /*
+        * A context is actually a big batch buffer with several
          * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
          * values we are setting here are only for the first context restore:
          * on a subsequent save, the GPU will recreate this batchbuffer with new
          * values (including all the missing MI_LOAD_REGISTER_IMM commands that
          * we are not initializing here).
+        *
+        * Must keep consistent with virtual_update_register_offsets().
          */
         regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
                                  MI_LRI_FORCE_POSTED;
@@ -2715,6 +3016,320 @@ error_deref_obj:
         return ret;
  }
  
+static void virtual_context_destroy(struct kref *kref)
+{
+       struct virtual_engine *ve =
+               container_of(kref, typeof(*ve), context.ref);
+       unsigned int n;
+
+       GEM_BUG_ON(ve->request);
+       GEM_BUG_ON(ve->context.active);
+
+       for (n = 0; n < ve->num_siblings; n++) {
+               struct intel_engine_cs *sibling = ve->siblings[n];
+               struct rb_node *node = &ve->nodes[sibling->id].rb;
+
+               if (RB_EMPTY_NODE(node))
+                       continue;
+
+               spin_lock_irq(&sibling->timeline.lock);
+
+               /* Detachment is lazily performed in the execlists tasklet */
+               if (!RB_EMPTY_NODE(node))
+                       rb_erase_cached(node, &sibling->execlists.virtual);
+
+               spin_unlock_irq(&sibling->timeline.lock);
+       }
+       GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+       if (ve->context.state)
+               __execlists_context_fini(&ve->context);
+
+       i915_timeline_fini(&ve->base.timeline);
+       kfree(ve);
+}
+
+static void virtual_engine_initial_hint(struct virtual_engine *ve)
+{
+       int swp;
+
+       /*
+        * Pick a random sibling on starting to help spread the load around.
+        *
+        * New contexts are typically created with exactly the same order
+        * of siblings, and often started in batches. Due to the way we iterate
+        * the array of sibling when submitting requests, sibling[0] is
+        * prioritised for dequeuing. If we make sure that sibling[0] is fairly
+        * randomised across the system, we also help spread the load by the
+        * first engine we inspect being different each time.
+        *
+        * NB This does not force us to execute on this engine, it will just
+        * typically be the first we inspect for submission.
+        */
+       swp = prandom_u32_max(ve->num_siblings);
+       if (!swp)
+               return;
+
+       swap(ve->siblings[swp], ve->siblings[0]);
+       virtual_update_register_offsets(ve->context.lrc_reg_state,
+                                       ve->siblings[0]);
+}
+
+static int virtual_context_pin(struct intel_context *ce)
+{
+       struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+       int err;
+
+       /* Note: we must use a real engine class for setting up reg state */
+       err = __execlists_context_pin(ce, ve->siblings[0]);
+       if (err)
+               return err;
+
+       virtual_engine_initial_hint(ve);
+       return 0;
+}
+
+static void virtual_context_enter(struct intel_context *ce)
+{
+       struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+       unsigned int n;
+
+       for (n = 0; n < ve->num_siblings; n++)
+               intel_engine_pm_get(ve->siblings[n]);
+}
+
+static void virtual_context_exit(struct intel_context *ce)
+{
+       struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+       unsigned int n;
+
+       ce->saturated = 0;
+       for (n = 0; n < ve->num_siblings; n++)
+               intel_engine_pm_put(ve->siblings[n]);
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+       .pin = virtual_context_pin,
+       .unpin = execlists_context_unpin,
+
+       .enter = virtual_context_enter,
+       .exit = virtual_context_exit,
+
+       .destroy = virtual_context_destroy,
+};
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+       struct virtual_engine * const ve = (struct virtual_engine *)data;
+       const int prio = ve->base.execlists.queue_priority_hint;
+       unsigned int n;
+
+       local_irq_disable();
+       for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
+               struct intel_engine_cs *sibling = ve->siblings[n];
+               struct ve_node * const node = &ve->nodes[sibling->id];
+               struct rb_node **parent, *rb;
+               bool first;
+
+               spin_lock(&sibling->timeline.lock);
+
+               if (!RB_EMPTY_NODE(&node->rb)) {
+                       /*
+                        * Cheat and avoid rebalancing the tree if we can
+                        * reuse this node in situ.
+                        */
+                       first = rb_first_cached(&sibling->execlists.virtual) ==
+                               &node->rb;
+                       if (prio == node->prio || (prio > node->prio && first))
+                               goto submit_engine;
+
+                       rb_erase_cached(&node->rb, &sibling->execlists.virtual);
+               }
+
+               rb = NULL;
+               first = true;
+               parent = &sibling->execlists.virtual.rb_root.rb_node;
+               while (*parent) {
+                       struct ve_node *other;
+
+                       rb = *parent;
+                       other = rb_entry(rb, typeof(*other), rb);
+                       if (prio > other->prio) {
+                               parent = &rb->rb_left;
+                       } else {
+                               parent = &rb->rb_right;
+                               first = false;
+                       }
+               }
+
+               rb_link_node(&node->rb, rb, parent);
+               rb_insert_color_cached(&node->rb,
+                                      &sibling->execlists.virtual,
+                                      first);
+
+submit_engine:
+               GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
+               node->prio = prio;
+               if (first && prio > sibling->execlists.queue_priority_hint) {
+                       sibling->execlists.queue_priority_hint = prio;
+                       tasklet_hi_schedule(&sibling->execlists.tasklet);
+               }
+
+               spin_unlock(&sibling->timeline.lock);
+       }
+       local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *rq)
+{
+       struct virtual_engine *ve = to_virtual_engine(rq->engine);
+
+       GEM_TRACE("%s: rq=%llx:%lld\n",
+                 ve->base.name,
+                 rq->fence.context,
+                 rq->fence.seqno);
+
+       GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
+
+       GEM_BUG_ON(ve->request);
+       ve->base.execlists.queue_priority_hint = rq_prio(rq);
+       WRITE_ONCE(ve->request, rq);
+
+       tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
+struct intel_context *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+                              struct intel_engine_cs **siblings,
+                              unsigned int count)
+{
+       struct virtual_engine *ve;
+       unsigned int n;
+       int err;
+
+       if (count == 0)
+               return ERR_PTR(-EINVAL);
+
+       if (count == 1)
+               return intel_context_create(ctx, siblings[0]);
+
+       ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
+       if (!ve)
+               return ERR_PTR(-ENOMEM);
+
+       ve->base.i915 = ctx->i915;
+       ve->base.id = -1;
+       ve->base.class = OTHER_CLASS;
+       ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
+       ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+       ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+       snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+
+       err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL);
+       if (err)
+               goto err_put;
+       i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
+
+       intel_engine_init_execlists(&ve->base);
+
+       ve->base.cops = &virtual_context_ops;
+       ve->base.request_alloc = execlists_request_alloc;
+
+       ve->base.schedule = i915_schedule;
+       ve->base.submit_request = virtual_submit_request;
+
+       ve->base.execlists.queue_priority_hint = INT_MIN;
+       tasklet_init(&ve->base.execlists.tasklet,
+                    virtual_submission_tasklet,
+                    (unsigned long)ve);
+
+       intel_context_init(&ve->context, ctx, &ve->base);
+
+       for (n = 0; n < count; n++) {
+               struct intel_engine_cs *sibling = siblings[n];
+
+               GEM_BUG_ON(!is_power_of_2(sibling->mask));
+               if (sibling->mask & ve->base.mask) {
+                       DRM_DEBUG("duplicate %s entry in load balancer\n",
+                                 sibling->name);
+                       err = -EINVAL;
+                       goto err_put;
+               }
+
+               /*
+                * The virtual engine implementation is tightly coupled to
+                * the execlists backend -- we push out request directly
+                * into a tree inside each physical engine. We could support
+                * layering if we handle cloning of the requests and
+                * submitting a copy into each backend.
+                */
+               if (sibling->execlists.tasklet.func !=
+                   execlists_submission_tasklet) {
+                       err = -ENODEV;
+                       goto err_put;
+               }
+
+               GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
+               RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
+
+               ve->siblings[ve->num_siblings++] = sibling;
+               ve->base.mask |= sibling->mask;
+
+               /*
+                * All physical engines must be compatible for their emission
+                * functions (as we build the instructions during request
+                * construction and do not alter them before submission
+                * on the physical engine). We use the engine class as a guide
+                * here, although that could be refined.
+                */
+               if (ve->base.class != OTHER_CLASS) {
+                       if (ve->base.class != sibling->class) {
+                               DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
+                                         sibling->class, ve->base.class);
+                               err = -EINVAL;
+                               goto err_put;
+                       }
+                       continue;
+               }
+
+               ve->base.class = sibling->class;
+               ve->base.uabi_class = sibling->uabi_class;
+               snprintf(ve->base.name, sizeof(ve->base.name),
+                        "v%dx%d", ve->base.class, count);
+               ve->base.context_size = sibling->context_size;
+
+               ve->base.emit_bb_start = sibling->emit_bb_start;
+               ve->base.emit_flush = sibling->emit_flush;
+               ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
+               ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
+               ve->base.emit_fini_breadcrumb_dw =
+                       sibling->emit_fini_breadcrumb_dw;
+       }
+
+       return &ve->context;
+
+err_put:
+       intel_context_put(&ve->context);
+       return ERR_PTR(err);
+}
+
+struct intel_context *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+                             struct intel_engine_cs *src)
+{
+       struct virtual_engine *se = to_virtual_engine(src);
+       struct intel_context *dst;
+
+       dst = intel_execlists_create_virtual(ctx,
+                                            se->siblings,
+                                            se->num_siblings);
+       if (IS_ERR(dst))
+               return dst;
+
+       return dst;
+}
+
  void intel_execlists_show_requests(struct intel_engine_cs *engine,
                                    struct drm_printer *m,
                                    void (*show_request)(struct drm_printer *m,
@@ -2772,6 +3387,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
                 show_request(m, last, "\t\tQ ");
         }
  
+       last = NULL;
+       count = 0;
+       for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
+               struct virtual_engine *ve =
+                       rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+               struct i915_request *rq = READ_ONCE(ve->request);
+
+               if (rq) {
+                       if (count++ < max - 1)
+                               show_request(m, rq, "\t\tV ");
+                       else
+                               last = rq;
+               }
+       }
+       if (last) {
+               if (count > max) {
+                       drm_printf(m,
+                                  "\t\t...skipping %d virtual requests...\n",
+                                  count - max);
+               }
+               show_request(m, last, "\t\tV ");
+       }
+
         spin_unlock_irqrestore(&engine->timeline.lock, flags);
  }
  
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h

index a0dc907a7249753ba71aec2e553d95a0e737ee50..5530606052e5b5837b0a0aed6ca3dc7cd7e21316 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -114,4 +114,13 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
                                                         const char *prefix),
                                    unsigned int max);
  
+struct intel_context *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+                              struct intel_engine_cs **siblings,
+                              unsigned int count);
+
+struct intel_context *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+                             struct intel_engine_cs *src);
+
  #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c

index 5b3d8e33f1cfdf6cbeb3a6fea47fa1c1e01d7b6a..f880271fb9bad47f1f9ef2bea5d3eea0fcae2184 100644 (file)
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1310,6 +1310,185 @@ err_unlock:
         return err;
  }
  
+static int nop_virtual_engine(struct drm_i915_private *i915,
+                             struct intel_engine_cs **siblings,
+                             unsigned int nsibling,
+                             unsigned int nctx,
+                             unsigned int flags)
+#define CHAIN BIT(0)
+{
+       IGT_TIMEOUT(end_time);
+       struct i915_request *request[16];
+       struct i915_gem_context *ctx[16];
+       struct intel_context *ve[16];
+       unsigned long n, prime, nc;
+       struct igt_live_test t;
+       ktime_t times[2] = {};
+       int err;
+
+       GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+       for (n = 0; n < nctx; n++) {
+               ctx[n] = kernel_context(i915);
+               if (!ctx[n]) {
+                       err = -ENOMEM;
+                       nctx = n;
+                       goto out;
+               }
+
+               ve[n] = intel_execlists_create_virtual(ctx[n],
+                                                      siblings, nsibling);
+               if (IS_ERR(ve[n])) {
+                       kernel_context_close(ctx[n]);
+                       err = PTR_ERR(ve[n]);
+                       nctx = n;
+                       goto out;
+               }
+
+               err = intel_context_pin(ve[n]);
+               if (err) {
+                       intel_context_put(ve[n]);
+                       kernel_context_close(ctx[n]);
+                       nctx = n;
+                       goto out;
+               }
+       }
+
+       err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+       if (err)
+               goto out;
+
+       for_each_prime_number_from(prime, 1, 8192) {
+               times[1] = ktime_get_raw();
+
+               if (flags & CHAIN) {
+                       for (nc = 0; nc < nctx; nc++) {
+                               for (n = 0; n < prime; n++) {
+                                       request[nc] =
+                                               i915_request_create(ve[nc]);
+                                       if (IS_ERR(request[nc])) {
+                                               err = PTR_ERR(request[nc]);
+                                               goto out;
+                                       }
+
+                                       i915_request_add(request[nc]);
+                               }
+                       }
+               } else {
+                       for (n = 0; n < prime; n++) {
+                               for (nc = 0; nc < nctx; nc++) {
+                                       request[nc] =
+                                               i915_request_create(ve[nc]);
+                                       if (IS_ERR(request[nc])) {
+                                               err = PTR_ERR(request[nc]);
+                                               goto out;
+                                       }
+
+                                       i915_request_add(request[nc]);
+                               }
+                       }
+               }
+
+               for (nc = 0; nc < nctx; nc++) {
+                       if (i915_request_wait(request[nc],
+                                             I915_WAIT_LOCKED,
+                                             HZ / 10) < 0) {
+                               pr_err("%s(%s): wait for %llx:%lld timed out\n",
+                                      __func__, ve[0]->engine->name,
+                                      request[nc]->fence.context,
+                                      request[nc]->fence.seqno);
+
+                               GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+                                         __func__, ve[0]->engine->name,
+                                         request[nc]->fence.context,
+                                         request[nc]->fence.seqno);
+                               GEM_TRACE_DUMP();
+                               i915_gem_set_wedged(i915);
+                               break;
+                       }
+               }
+
+               times[1] = ktime_sub(ktime_get_raw(), times[1]);
+               if (prime == 1)
+                       times[0] = times[1];
+
+               if (__igt_timeout(end_time, NULL))
+                       break;
+       }
+
+       err = igt_live_test_end(&t);
+       if (err)
+               goto out;
+
+       pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+               nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
+               prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+       if (igt_flush_test(i915, I915_WAIT_LOCKED))
+               err = -EIO;
+
+       for (nc = 0; nc < nctx; nc++) {
+               intel_context_unpin(ve[nc]);
+               intel_context_put(ve[nc]);
+               kernel_context_close(ctx[nc]);
+       }
+       return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       unsigned int class, inst;
+       int err = -ENODEV;
+
+       if (USES_GUC_SUBMISSION(i915))
+               return 0;
+
+       mutex_lock(&i915->drm.struct_mutex);
+
+       for_each_engine(engine, i915, id) {
+               err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+               if (err) {
+                       pr_err("Failed to wrap engine %s: err=%d\n",
+                              engine->name, err);
+                       goto out_unlock;
+               }
+       }
+
+       for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+               int nsibling, n;
+
+               nsibling = 0;
+               for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+                       if (!i915->engine_class[class][inst])
+                               continue;
+
+                       siblings[nsibling++] = i915->engine_class[class][inst];
+               }
+               if (nsibling < 2)
+                       continue;
+
+               for (n = 1; n <= nsibling + 1; n++) {
+                       err = nop_virtual_engine(i915, siblings, nsibling,
+                                                n, 0);
+                       if (err)
+                               goto out_unlock;
+               }
+
+               err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+               if (err)
+                       goto out_unlock;
+       }
+
+out_unlock:
+       mutex_unlock(&i915->drm.struct_mutex);
+       return err;
+}
+
  int intel_execlists_live_selftests(struct drm_i915_private *i915)
  {
         static const struct i915_subtest tests[] = {
@@ -1322,6 +1501,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
                 SUBTEST(live_chain_preempt),
                 SUBTEST(live_preempt_hang),
                 SUBTEST(live_preempt_smoke),
+               SUBTEST(live_virtual_engine),
         };
  
         if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h

index 67f8a4a807a011c423fe4e28c5e1d340067abda5..fe82d3571072b0bbdcba564f0af6e0ea53b4018f 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -91,4 +91,9 @@ static inline bool __tasklet_enable(struct tasklet_struct *t)
         return atomic_dec_and_test(&t->count);
  }
  
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+       return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
  #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c

index 24736fcd463d94816d2d72d718e3c8cf93ac9b66..68db6b28e6067923ea647f430ed074c89a740b20 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,6 +86,7 @@
   */
  
  #include <linux/log2.h>
+#include <linux/nospec.h>
  
  #include <drm/i915_drm.h>
  
@@ -1218,7 +1219,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce,
         int ret;
  
         GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
-       GEM_BUG_ON(ce->engine->id != RCS0);
  
         ret = intel_context_lock_pinned(ce);
         if (ret)
@@ -1412,7 +1412,100 @@ struct set_engines {
         struct i915_gem_engines *engines;
  };
  
+static int
+set_engines__load_balance(struct i915_user_extension __user *base, void *data)
+{
+       struct i915_context_engines_load_balance __user *ext =
+               container_of_user(base, typeof(*ext), base);
+       const struct set_engines *set = data;
+       struct intel_engine_cs *stack[16];
+       struct intel_engine_cs **siblings;
+       struct intel_context *ce;
+       u16 num_siblings, idx;
+       unsigned int n;
+       int err;
+
+       if (!HAS_EXECLISTS(set->ctx->i915))
+               return -ENODEV;
+
+       if (USES_GUC_SUBMISSION(set->ctx->i915))
+               return -ENODEV; /* not implement yet */
+
+       if (get_user(idx, &ext->engine_index))
+               return -EFAULT;
+
+       if (idx >= set->engines->num_engines) {
+               DRM_DEBUG("Invalid placement value, %d >= %d\n",
+                         idx, set->engines->num_engines);
+               return -EINVAL;
+       }
+
+       idx = array_index_nospec(idx, set->engines->num_engines);
+       if (set->engines->engines[idx]) {
+               DRM_DEBUG("Invalid placement[%d], already occupied\n", idx);
+               return -EEXIST;
+       }
+
+       if (get_user(num_siblings, &ext->num_siblings))
+               return -EFAULT;
+
+       err = check_user_mbz(&ext->flags);
+       if (err)
+               return err;
+
+       err = check_user_mbz(&ext->mbz64);
+       if (err)
+               return err;
+
+       siblings = stack;
+       if (num_siblings > ARRAY_SIZE(stack)) {
+               siblings = kmalloc_array(num_siblings,
+                                        sizeof(*siblings),
+                                        GFP_KERNEL);
+               if (!siblings)
+                       return -ENOMEM;
+       }
+
+       for (n = 0; n < num_siblings; n++) {
+               struct i915_engine_class_instance ci;
+
+               if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+                       err = -EFAULT;
+                       goto out_siblings;
+               }
+
+               siblings[n] = intel_engine_lookup_user(set->ctx->i915,
+                                                      ci.engine_class,
+                                                      ci.engine_instance);
+               if (!siblings[n]) {
+                       DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n",
+                                 n, ci.engine_class, ci.engine_instance);
+                       err = -EINVAL;
+                       goto out_siblings;
+               }
+       }
+
+       ce = intel_execlists_create_virtual(set->ctx, siblings, n);
+       if (IS_ERR(ce)) {
+               err = PTR_ERR(ce);
+               goto out_siblings;
+       }
+
+       if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
+               intel_context_put(ce);
+               err = -EEXIST;
+               goto out_siblings;
+       }
+
+out_siblings:
+       if (siblings != stack)
+               kfree(siblings);
+
+       return err;
+}
+
  static const i915_user_extension_fn set_engines__extensions[] = {
+       [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
  };
  
  static int
@@ -1737,14 +1830,29 @@ static int clone_engines(struct i915_gem_context *dst,
  
         clone->i915 = dst->i915;
         for (n = 0; n < e->num_engines; n++) {
+               struct intel_engine_cs *engine;
+
                 if (!e->engines[n]) {
                         clone->engines[n] = NULL;
                         continue;
                 }
+               engine = e->engines[n]->engine;
  
-               clone->engines[n] =
-                       intel_context_create(dst, e->engines[n]->engine);
-               if (!clone->engines[n]) {
+               /*
+                * Virtual engines are singletons; they can only exist
+                * inside a single context, because they embed their
+                * HW context... As each virtual context implies a single
+                * timeline (each engine can only dequeue a single request
+                * at any time), it would be surprising for two contexts
+                * to use the same engine. So let's create a copy of
+                * the virtual engine instead.
+                */
+               if (intel_engine_is_virtual(engine))
+                       clone->engines[n] =
+                               intel_execlists_clone_virtual(dst, engine);
+               else
+                       clone->engines[n] = intel_context_create(dst, engine);
+               if (IS_ERR_OR_NULL(clone->engines[n])) {
                         __free_engines(clone, n);
                         goto err_unlock;
                 }
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c

index d215dcdf0b1eac2a3a0ee5920b76b69611307594..78ceb56d78017fac77b6484608ff21a7e9bc46bf 100644 (file)
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -150,17 +150,26 @@ sched_lock_engine(const struct i915_sched_node *node,
                   struct intel_engine_cs *locked,
                   struct sched_cache *cache)
  {
-       struct intel_engine_cs *engine = node_to_request(node)->engine;
+       const struct i915_request *rq = node_to_request(node);
+       struct intel_engine_cs *engine;
  
         GEM_BUG_ON(!locked);
  
-       if (engine != locked) {
+       /*
+        * Virtual engines complicate acquiring the engine timeline lock,
+        * as their rq->engine pointer is not stable until under that
+        * engine lock. The simple ploy we use is to take the lock then
+        * check that the rq still belongs to the newly locked engine.
+        */
+       while (locked != (engine = READ_ONCE(rq->engine))) {
                 spin_unlock(&locked->timeline.lock);
                 memset(cache, 0, sizeof(*cache));
                 spin_lock(&engine->timeline.lock);
+               locked = engine;
         }
  
-       return engine;
+       GEM_BUG_ON(locked != engine);
+       return locked;
  }
  
  static inline int rq_prio(const struct i915_request *rq)
@@ -272,6 +281,7 @@ static void __i915_schedule(struct i915_sched_node *node,
         spin_lock(&engine->timeline.lock);
  
         /* Fifo and depth-first replacement ensure our deps execute before us */
+       engine = sched_lock_engine(node, engine, &cache);
         list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
                 INIT_LIST_HEAD(&dep->dfs_link);
  
@@ -283,8 +293,11 @@ static void __i915_schedule(struct i915_sched_node *node,
                 if (prio <= node->attr.priority || node_signaled(node))
                         continue;
  
+               GEM_BUG_ON(node_to_request(node)->engine != engine);
+
                 node->attr.priority = prio;
                 if (!list_empty(&node->link)) {
+                       GEM_BUG_ON(intel_engine_is_virtual(engine));
                         if (!cache.priolist)
                                 cache.priolist =
                                         i915_sched_lookup_priolist(engine,
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h

index 5256a0b5c5f77b005199f9d5a2b3e09100ab1070..1688705f4a2b7633db520f40f71c82cd10347ba6 100644 (file)
--- a/drivers/gpu/drm/i915/i915_timeline_types.h
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -26,6 +26,7 @@ struct i915_timeline {
         spinlock_t lock;
  #define TIMELINE_CLIENT 0 /* default subclass */
  #define TIMELINE_ENGINE 1
+#define TIMELINE_VIRTUAL 2
         struct mutex mutex; /* protects the flow of requests */
  
         unsigned int pin_count;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h

index 62396d575e28a8e42d105e2ee0923178b2646469..f9770948161c39b1ec540ad13d0c26a06f3d15a0 100644 (file)
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -137,6 +137,7 @@ struct i915_engine_class_instance {
         __u16 engine_class; /* see enum drm_i915_gem_engine_class */
         __u16 engine_instance;
  #define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
  };
  
  /**
@@ -1608,8 +1609,46 @@ struct drm_i915_gem_context_param_sseu {
         __u32 rsvd;
  };
  
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+       struct i915_user_extension base;
+
+       __u16 engine_index;
+       __u16 num_siblings;
+       __u32 flags; /* all undefined flags must be zero */
+
+       __u64 mbz64; /* reserved for future use; must be zero */
+
+       struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
+       struct i915_user_extension base; \
+       __u16 engine_index; \
+       __u16 num_siblings; \
+       __u32 flags; \
+       __u64 mbz64; \
+       struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
  struct i915_context_param_engines {
         __u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
         struct i915_engine_class_instance engines[0];
  } __attribute__((packed));
author	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 21 May 2019 21:11:30 +0000 (22:11 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 22 May 2019 07:40:38 +0000 (08:40 +0100)
drivers/gpu/drm/i915/gt/intel_engine_types.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/selftest_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_context.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_scheduler.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_timeline_types.h		patch \| blob \| history
include/uapi/drm/i915_drm.h		patch \| blob \| history