drm/i915/execlists: Suppress mere WAIT preemption
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 28 Feb 2019 22:06:39 +0000 (22:06 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 28 Feb 2019 23:10:43 +0000 (23:10 +0000)
WAIT is occasionally suppressed by virtue of preempted requests being
promoted to NEWCLIENT if they have not all ready received that boost.
Make this consistent for all WAIT boosts that they are not allowed to
preempt executing contexts and are merely granted the right to be at the
front of the queue for the next execution slot. This is in keeping with
the desire that the WAIT boost be a minor tweak that does not give
excessive promotion to its user and open ourselves to trivial abuse.

The problem with the inconsistent WAIT preemption becomes more apparent
as the preemption is propagated across the engines, where one engine may
preempt and the other not, and we be relying on the exact execution
order being consistent across engines (e.g. using HW semaphores to
coordinate parallel execution).

v2: Also protect GuC submission from false preemption loops.
v3: Build bug safeguards and better debug messages for st.
v4: Do the priority bumping in unsubmit (i.e. on preemption/reset
unwind), applying it earlier during submit causes out-of-order execution
combined with execute fences.
v5: Call sw_fence_fini for our dummy request (Matthew)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190228220639.3173-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_scheduler.c
drivers/gpu/drm/i915/i915_scheduler.h
drivers/gpu/drm/i915/intel_guc_submission.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/selftests/intel_lrc.c

index a011bf4be48ef3c9a49f50f8f772ed1c18ab9ce4..c65f6c990fddc99194c6158c97c9be38974099dc 100644 (file)
@@ -358,11 +358,14 @@ void __i915_request_submit(struct i915_request *request)
 
        /* We may be recursing from the signal callback of another i915 fence */
        spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
        GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
        set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+
        if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
            !i915_request_enable_breadcrumb(request))
                intel_engine_queue_breadcrumbs(engine);
+
        spin_unlock(&request->lock);
 
        engine->emit_fini_breadcrumb(request,
@@ -406,10 +409,22 @@ void __i915_request_unsubmit(struct i915_request *request)
 
        /* We may be recursing from the signal callback of another i915 fence */
        spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
+       /*
+        * As we do not allow WAIT to preempt inflight requests,
+        * once we have executed a request, along with triggering
+        * any execution callbacks, we must preserve its ordering
+        * within the non-preemptible FIFO.
+        */
+       BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
+       request->sched.attr.priority |= __NO_PREEMPTION;
+
        if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
                i915_request_cancel_breadcrumb(request);
+
        GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
        clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+
        spin_unlock(&request->lock);
 
        /* Transfer back from the global per-engine timeline to per-context */
index 0dd720593f9ce502e80753eff364adf741d378e8..50018ad302334f27a965cce215547925f753f241 100644 (file)
@@ -324,7 +324,6 @@ static void __i915_schedule(struct i915_request *rq,
                        if (node_signaled(p->signaler))
                                continue;
 
-                       GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
                        if (prio > READ_ONCE(p->signaler->attr.priority))
                                list_move_tail(&p->dfs_link, &dfs);
                }
index bb3d496d4c49ef737d204ec905b34ccaf7da3f84..7d4a49750d92e7e13fb35fa871be2b2692f8c542 100644 (file)
@@ -33,6 +33,8 @@ enum {
 #define I915_PRIORITY_WAIT     ((u8)BIT(0))
 #define I915_PRIORITY_NEWCLIENT        ((u8)BIT(1))
 
+#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
+
 struct i915_sched_attr {
        /**
         * @priority: execution and service priority
index 4366db7978a83453402126a4e5c52c80ffe1020e..56ba2fcbabe6ddf2f464459f6c48b509df074282 100644 (file)
@@ -720,7 +720,7 @@ static inline int rq_prio(const struct i915_request *rq)
 
 static inline int port_prio(const struct execlist_port *port)
 {
-       return rq_prio(port_request(port));
+       return rq_prio(port_request(port)) | __NO_PREEMPTION;
 }
 
 static bool __guc_dequeue(struct intel_engine_cs *engine)
index 661d2f6da84f9a797a259624c7afeb3b59ba0404..4f2187aa44e49b15968814297873923d0ee7eb57 100644 (file)
@@ -188,6 +188,12 @@ static inline int rq_prio(const struct i915_request *rq)
        return rq->sched.attr.priority;
 }
 
+static int effective_prio(const struct i915_request *rq)
+{
+       /* Restrict mere WAIT boosts from triggering preemption */
+       return rq_prio(rq) | __NO_PREEMPTION;
+}
+
 static int queue_prio(const struct intel_engine_execlists *execlists)
 {
        struct i915_priolist *p;
@@ -208,7 +214,7 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
 static inline bool need_preempt(const struct intel_engine_cs *engine,
                                const struct i915_request *rq)
 {
-       const int last_prio = rq_prio(rq);
+       int last_prio;
 
        if (!intel_engine_has_preemption(engine))
                return false;
@@ -228,6 +234,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
         * preempt. If that hint is stale or we may be trying to preempt
         * ourselves, ignore the request.
         */
+       last_prio = effective_prio(rq);
        if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
                                      last_prio))
                return false;
index 0f7a5bf696468383d96031bddfb9a5cc20d75580..0677038a546694a1f79a3a71aeeb82ff7ca5e24c 100644 (file)
@@ -407,6 +407,168 @@ err_wedged:
        goto err_client_b;
 }
 
+static int __i915_sw_fence_call
+dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+       return NOTIFY_DONE;
+}
+
+static struct i915_request *dummy_request(struct intel_engine_cs *engine)
+{
+       struct i915_request *rq;
+
+       rq = kzalloc(sizeof(*rq), GFP_KERNEL);
+       if (!rq)
+               return NULL;
+
+       INIT_LIST_HEAD(&rq->active_list);
+       rq->engine = engine;
+
+       i915_sched_node_init(&rq->sched);
+
+       /* mark this request as permanently incomplete */
+       rq->fence.seqno = 1;
+       BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
+       rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
+       GEM_BUG_ON(i915_request_completed(rq));
+
+       i915_sw_fence_init(&rq->submit, dummy_notify);
+       i915_sw_fence_commit(&rq->submit);
+
+       return rq;
+}
+
+static void dummy_request_free(struct i915_request *dummy)
+{
+       i915_request_mark_complete(dummy);
+       i915_sched_node_fini(&dummy->sched);
+       i915_sw_fence_fini(&dummy->submit);
+
+       dma_fence_free(&dummy->fence);
+}
+
+static int live_suppress_wait_preempt(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct preempt_client client[4];
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       intel_wakeref_t wakeref;
+       int err = -ENOMEM;
+       int i;
+
+       /*
+        * Waiters are given a little priority nudge, but not enough
+        * to actually cause any preemption. Double check that we do
+        * not needlessly generate preempt-to-idle cycles.
+        */
+
+       if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+               return 0;
+
+       mutex_lock(&i915->drm.struct_mutex);
+       wakeref = intel_runtime_pm_get(i915);
+
+       if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
+               goto err_unlock;
+       if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
+               goto err_client_0;
+       if (preempt_client_init(i915, &client[2])) /* head of queue */
+               goto err_client_1;
+       if (preempt_client_init(i915, &client[3])) /* bystander */
+               goto err_client_2;
+
+       for_each_engine(engine, i915, id) {
+               int depth;
+
+               if (!engine->emit_init_breadcrumb)
+                       continue;
+
+               for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
+                       struct i915_request *rq[ARRAY_SIZE(client)];
+                       struct i915_request *dummy;
+
+                       engine->execlists.preempt_hang.count = 0;
+
+                       dummy = dummy_request(engine);
+                       if (!dummy)
+                               goto err_client_3;
+
+                       for (i = 0; i < ARRAY_SIZE(client); i++) {
+                               rq[i] = igt_spinner_create_request(&client[i].spin,
+                                                                  client[i].ctx, engine,
+                                                                  MI_NOOP);
+                               if (IS_ERR(rq[i])) {
+                                       err = PTR_ERR(rq[i]);
+                                       goto err_wedged;
+                               }
+
+                               /* Disable NEWCLIENT promotion */
+                               __i915_active_request_set(&rq[i]->timeline->last_request,
+                                                         dummy);
+                               i915_request_add(rq[i]);
+                       }
+
+                       dummy_request_free(dummy);
+
+                       GEM_BUG_ON(i915_request_completed(rq[0]));
+                       if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
+                               pr_err("%s: First client failed to start\n",
+                                      engine->name);
+                               goto err_wedged;
+                       }
+                       GEM_BUG_ON(!i915_request_started(rq[0]));
+
+                       if (i915_request_wait(rq[depth],
+                                             I915_WAIT_LOCKED |
+                                             I915_WAIT_PRIORITY,
+                                             1) != -ETIME) {
+                               pr_err("%s: Waiter depth:%d completed!\n",
+                                      engine->name, depth);
+                               goto err_wedged;
+                       }
+
+                       for (i = 0; i < ARRAY_SIZE(client); i++)
+                               igt_spinner_end(&client[i].spin);
+
+                       if (igt_flush_test(i915, I915_WAIT_LOCKED))
+                               goto err_wedged;
+
+                       if (engine->execlists.preempt_hang.count) {
+                               pr_err("%s: Preemption recorded x%d, depth %d; should have been suppressed!\n",
+                                      engine->name,
+                                      engine->execlists.preempt_hang.count,
+                                      depth);
+                               err = -EINVAL;
+                               goto err_client_3;
+                       }
+               }
+       }
+
+       err = 0;
+err_client_3:
+       preempt_client_fini(&client[3]);
+err_client_2:
+       preempt_client_fini(&client[2]);
+err_client_1:
+       preempt_client_fini(&client[1]);
+err_client_0:
+       preempt_client_fini(&client[0]);
+err_unlock:
+       if (igt_flush_test(i915, I915_WAIT_LOCKED))
+               err = -EIO;
+       intel_runtime_pm_put(i915, wakeref);
+       mutex_unlock(&i915->drm.struct_mutex);
+       return err;
+
+err_wedged:
+       for (i = 0; i < ARRAY_SIZE(client); i++)
+               igt_spinner_end(&client[i].spin);
+       i915_gem_set_wedged(i915);
+       err = -EIO;
+       goto err_client_3;
+}
+
 static int live_chain_preempt(void *arg)
 {
        struct drm_i915_private *i915 = arg;
@@ -887,6 +1049,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
                SUBTEST(live_preempt),
                SUBTEST(live_late_preempt),
                SUBTEST(live_suppress_self_preempt),
+               SUBTEST(live_suppress_wait_preempt),
                SUBTEST(live_chain_preempt),
                SUBTEST(live_preempt_hang),
                SUBTEST(live_preempt_smoke),