drm/i915/execlists: Preempt-to-busy
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 20 Jun 2019 14:20:51 +0000 (15:20 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 20 Jun 2019 15:52:36 +0000 (16:52 +0100)
When using a global seqno, we required a precise stop-the-workd event to
handle preemption and unwind the global seqno counter. To accomplish
this, we would preempt to a special out-of-band context and wait for the
machine to report that it was idle. Given an idle machine, we could very
precisely see which requests had completed and which we needed to feed
back into the run queue.

However, now that we have scrapped the global seqno, we no longer need
to precisely unwind the global counter and only track requests by their
per-context seqno. This allows us to loosely unwind inflight requests
while scheduling a preemption, with the enormous caveat that the
requests we put back on the run queue are still _inflight_ (until the
preemption request is complete). This makes request tracking much more
messy, as at any point then we can see a completed request that we
believe is not currently scheduled for execution. We also have to be
careful not to rewind RING_TAIL past RING_HEAD on preempting to the
running context, and for this we use a semaphore to prevent completion
of the request before continuing.

To accomplish this feat, we change how we track requests scheduled to
the HW. Instead of appending our requests onto a single list as we
submit, we track each submission to ELSP as its own block. Then upon
receiving the CS preemption event, we promote the pending block to the
inflight block (discarding what was previously being tracked). As normal
CS completion events arrive, we then remove stale entries from the
inflight tracker.

v2: Be a tinge paranoid and ensure we flush the write into the HWS page
for the GPU semaphore to pick in a timely fashion.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190620142052.19311-1-chris@chris-wilson.co.uk
13 files changed:
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gt/intel_context_types.h
drivers/gpu/drm/i915/gt/intel_engine.h
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_request.h
drivers/gpu/drm/i915/i915_scheduler.c
drivers/gpu/drm/i915/i915_utils.h
drivers/gpu/drm/i915/intel_guc_submission.c
drivers/gpu/drm/i915/selftests/i915_request.c

index 0f2c22a3bcb6a20f615877a4cfdd4aa0ab202434..35871c8a42a61be58be82fe3b0b0ffa4cd65997b 100644 (file)
@@ -646,7 +646,7 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-       return HAS_EXECLISTS(i915);
+       return USES_GUC_SUBMISSION(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
index 08049ee91ceeac210222941a64327d1bed3f9f03..4c0e211c715d14b0133dbdae097a93aa3df4ed3c 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 
 #include "i915_active_types.h"
+#include "i915_utils.h"
 #include "intel_engine_types.h"
 #include "intel_sseu.h"
 
@@ -38,6 +39,10 @@ struct intel_context {
        struct i915_gem_context *gem_context;
        struct intel_engine_cs *engine;
        struct intel_engine_cs *inflight;
+#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
+#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
+#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
+#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
 
        struct list_head signal_link;
        struct list_head signals;
index 2f1c6871ee95718549c40f7950075dbde1759de5..9bb6ff76680e2e4c97670bb5480ef7138cd2b910 100644 (file)
@@ -125,71 +125,26 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
 
-static inline void
-execlists_set_active(struct intel_engine_execlists *execlists,
-                    unsigned int bit)
-{
-       __set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline bool
-execlists_set_active_once(struct intel_engine_execlists *execlists,
-                         unsigned int bit)
-{
-       return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_active(struct intel_engine_execlists *execlists,
-                      unsigned int bit)
-{
-       __clear_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_all_active(struct intel_engine_execlists *execlists)
+static inline unsigned int
+execlists_num_ports(const struct intel_engine_execlists * const execlists)
 {
-       execlists->active = 0;
+       return execlists->port_mask + 1;
 }
 
-static inline bool
-execlists_is_active(const struct intel_engine_execlists *execlists,
-                   unsigned int bit)
+static inline struct i915_request *
+execlists_active(const struct intel_engine_execlists *execlists)
 {
-       return test_bit(bit, (unsigned long *)&execlists->active);
+       GEM_BUG_ON(execlists->active - execlists->inflight >
+                  execlists_num_ports(execlists));
+       return READ_ONCE(*execlists->active);
 }
 
-void execlists_user_begin(struct intel_engine_execlists *execlists,
-                         const struct execlist_port *port);
-void execlists_user_end(struct intel_engine_execlists *execlists);
-
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
 
 struct i915_request *
 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
 
-static inline unsigned int
-execlists_num_ports(const struct intel_engine_execlists * const execlists)
-{
-       return execlists->port_mask + 1;
-}
-
-static inline struct execlist_port *
-execlists_port_complete(struct intel_engine_execlists * const execlists,
-                       struct execlist_port * const port)
-{
-       const unsigned int m = execlists->port_mask;
-
-       GEM_BUG_ON(port_index(port, execlists) != 0);
-       GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-
-       memmove(port, port + 1, m * sizeof(struct execlist_port));
-       memset(port + m, 0, sizeof(struct execlist_port));
-
-       return port;
-}
-
 static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
index 7fd33e81c2d97dad11122b94d1bc515a03085725..d45328e254dc9489f550b3acbe7741ae3a92e0d5 100644 (file)
@@ -508,6 +508,10 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
        GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
        GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
+       memset(execlists->pending, 0, sizeof(execlists->pending));
+       execlists->active =
+               memset(execlists->inflight, 0, sizeof(execlists->inflight));
+
        execlists->queue_priority_hint = INT_MIN;
        execlists->queue = RB_ROOT_CACHED;
 }
@@ -1152,7 +1156,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
                return true;
 
        /* Waiting to drain ELSP? */
-       if (READ_ONCE(engine->execlists.active)) {
+       if (execlists_active(&engine->execlists)) {
                struct tasklet_struct *t = &engine->execlists.tasklet;
 
                synchronize_hardirq(engine->i915->drm.irq);
@@ -1169,7 +1173,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
                /* Otherwise flush the tasklet if it was on another cpu */
                tasklet_unlock_wait(t);
 
-               if (READ_ONCE(engine->execlists.active))
+               if (execlists_active(&engine->execlists))
                        return false;
        }
 
@@ -1367,6 +1371,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
        }
 
        if (HAS_EXECLISTS(dev_priv)) {
+               struct i915_request * const *port, *rq;
                const u32 *hws =
                        &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
                const u8 num_entries = execlists->csb_size;
@@ -1399,27 +1404,33 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
                }
 
                spin_lock_irqsave(&engine->active.lock, flags);
-               for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
-                       struct i915_request *rq;
-                       unsigned int count;
+               for (port = execlists->active; (rq = *port); port++) {
+                       char hdr[80];
+                       int len;
+
+                       len = snprintf(hdr, sizeof(hdr),
+                                      "\t\tActive[%d: ",
+                                      (int)(port - execlists->active));
+                       if (!i915_request_signaled(rq))
+                               len += snprintf(hdr + len, sizeof(hdr) - len,
+                                               "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
+                                               i915_ggtt_offset(rq->ring->vma),
+                                               rq->timeline->hwsp_offset,
+                                               hwsp_seqno(rq));
+                       snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+                       print_request(m, rq, hdr);
+               }
+               for (port = execlists->pending; (rq = *port); port++) {
                        char hdr[80];
 
-                       rq = port_unpack(&execlists->port[idx], &count);
-                       if (!rq) {
-                               drm_printf(m, "\t\tELSP[%d] idle\n", idx);
-                       } else if (!i915_request_signaled(rq)) {
-                               snprintf(hdr, sizeof(hdr),
-                                        "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
-                                        idx, count,
-                                        i915_ggtt_offset(rq->ring->vma),
-                                        rq->timeline->hwsp_offset,
-                                        hwsp_seqno(rq));
-                               print_request(m, rq, hdr);
-                       } else {
-                               print_request(m, rq, "\t\tELSP[%d] rq: ");
-                       }
+                       snprintf(hdr, sizeof(hdr),
+                                "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+                                (int)(port - execlists->pending),
+                                i915_ggtt_offset(rq->ring->vma),
+                                rq->timeline->hwsp_offset,
+                                hwsp_seqno(rq));
+                       print_request(m, rq, hdr);
                }
-               drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
                spin_unlock_irqrestore(&engine->active.lock, flags);
        } else if (INTEL_GEN(dev_priv) > 6) {
                drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
@@ -1583,15 +1594,19 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
        }
 
        if (engine->stats.enabled++ == 0) {
-               const struct execlist_port *port = execlists->port;
-               unsigned int num_ports = execlists_num_ports(execlists);
+               struct i915_request * const *port;
+               struct i915_request *rq;
 
                engine->stats.enabled_at = ktime_get();
 
                /* XXX submission method oblivious? */
-               while (num_ports-- && port_isset(port)) {
+               for (port = execlists->active; (rq = *port); port++)
                        engine->stats.active++;
-                       port++;
+
+               for (port = execlists->pending; (rq = *port); port++) {
+                       /* Exclude any contexts already counted in active */
+                       if (intel_context_inflight_count(rq->hw_context) == 1)
+                               engine->stats.active++;
                }
 
                if (engine->stats.active)
index 43e975a26016bf75038a044c55e222aefcf8ff2b..b4f7b81a3c3e207db02cfc29c598752a1365fa2a 100644 (file)
@@ -172,51 +172,28 @@ struct intel_engine_execlists {
         */
        u32 __iomem *ctrl_reg;
 
+#define EXECLIST_MAX_PORTS 2
+       /**
+        * @active: the currently known context executing on HW
+        */
+       struct i915_request * const *active;
        /**
-        * @port: execlist port states
+        * @inflight: the set of contexts submitted and acknowleged by HW
         *
-        * For each hardware ELSP (ExecList Submission Port) we keep
-        * track of the last request and the number of times we submitted
-        * that port to hw. We then count the number of times the hw reports
-        * a context completion or preemption. As only one context can
-        * be active on hw, we limit resubmission of context to port[0]. This
-        * is called Lite Restore, of the context.
+        * The set of inflight contexts is managed by reading CS events
+        * from the HW. On a context-switch event (not preemption), we
+        * know the HW has transitioned from port0 to port1, and we
+        * advance our inflight/active tracking accordingly.
         */
-       struct execlist_port {
-               /**
-                * @request_count: combined request and submission count
-                */
-               struct i915_request *request_count;
-#define EXECLIST_COUNT_BITS 2
-#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
-#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
-#define port_set(p, packed) ((p)->request_count = (packed))
-#define port_isset(p) ((p)->request_count)
-#define port_index(p, execlists) ((p) - (execlists)->port)
-
-               /**
-                * @context_id: context ID for port
-                */
-               GEM_DEBUG_DECL(u32 context_id);
-
-#define EXECLIST_MAX_PORTS 2
-       } port[EXECLIST_MAX_PORTS];
-
+       struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
        /**
-        * @active: is the HW active? We consider the HW as active after
-        * submitting any context for execution and until we have seen the
-        * last context completion event. After that, we do not expect any
-        * more events until we submit, and so can park the HW.
+        * @pending: the next set of contexts submitted to ELSP
         *
-        * As we have a small number of different sources from which we feed
-        * the HW, we track the state of each inside a single bitfield.
+        * We store the array of contexts that we submit to HW (via ELSP) and
+        * promote them to the inflight array once HW has signaled the
+        * preemption or idle-to-active event.
         */
-       unsigned int active;
-#define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
+       struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
 
        /**
         * @port_mask: number of execlist ports - 1
@@ -257,11 +234,6 @@ struct intel_engine_execlists {
         */
        u32 *csb_status;
 
-       /**
-        * @preempt_complete_status: expected CSB upon completing preemption
-        */
-       u32 preempt_complete_status;
-
        /**
         * @csb_size: context status buffer FIFO size
         */
index 82b7ace62d97ec13f110b4454115ae313d0791e0..cb9d285bd00a34ea558fc83fa76abf5f6bddc781 100644 (file)
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
         (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 #define WA_TAIL_DWORDS 2
@@ -221,6 +223,25 @@ static void execlists_init_reg_state(u32 *reg_state,
                                     struct intel_engine_cs *engine,
                                     struct intel_ring *ring);
 
+static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
+{
+       return (i915_ggtt_offset(engine->status_page.vma) +
+               I915_GEM_HWS_PREEMPT_ADDR);
+}
+
+static inline void
+ring_set_paused(const struct intel_engine_cs *engine, int state)
+{
+       /*
+        * We inspect HWS_PREEMPT with a semaphore inside
+        * engine->emit_fini_breadcrumb. If the dword is true,
+        * the ring is paused as the semaphore will busywait
+        * until the dword is false.
+        */
+       engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
+       wmb();
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
        return rb_entry(rb, struct i915_priolist, node);
@@ -271,12 +292,6 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 {
        int last_prio;
 
-       if (!engine->preempt_context)
-               return false;
-
-       if (i915_request_completed(rq))
-               return false;
-
        /*
         * Check if the current priority hint merits a preemption attempt.
         *
@@ -338,9 +353,6 @@ __maybe_unused static inline bool
 assert_priority_queue(const struct i915_request *prev,
                      const struct i915_request *next)
 {
-       const struct intel_engine_execlists *execlists =
-               &prev->engine->execlists;
-
        /*
         * Without preemption, the prev may refer to the still active element
         * which we refuse to let go.
@@ -348,7 +360,7 @@ assert_priority_queue(const struct i915_request *prev,
         * Even with preemption, there are times when we think it is better not
         * to preempt and leave an ostensibly lower priority request in flight.
         */
-       if (port_request(execlists->port) == prev)
+       if (i915_request_is_active(prev))
                return true;
 
        return rq_prio(prev) >= rq_prio(next);
@@ -442,13 +454,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
                struct intel_engine_cs *owner;
 
                if (i915_request_completed(rq))
-                       break;
+                       continue; /* XXX */
 
                __i915_request_unsubmit(rq);
                unwind_wa_tail(rq);
 
-               GEM_BUG_ON(rq->hw_context->inflight);
-
                /*
                 * Push the request back into the queue for later resubmission.
                 * If this request is not native to this physical engine (i.e.
@@ -500,32 +510,32 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
                                   status, rq);
 }
 
-inline void
-execlists_user_begin(struct intel_engine_execlists *execlists,
-                    const struct execlist_port *port)
+static inline struct i915_request *
+execlists_schedule_in(struct i915_request *rq, int idx)
 {
-       execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
-}
+       struct intel_context *ce = rq->hw_context;
+       int count;
 
-inline void
-execlists_user_end(struct intel_engine_execlists *execlists)
-{
-       execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
-}
+       trace_i915_request_in(rq, idx);
 
-static inline void
-execlists_context_schedule_in(struct i915_request *rq)
-{
-       GEM_BUG_ON(rq->hw_context->inflight);
+       count = intel_context_inflight_count(ce);
+       if (!count) {
+               intel_context_get(ce);
+               ce->inflight = rq->engine;
+
+               execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+               intel_engine_context_in(ce->inflight);
+       }
+
+       intel_context_inflight_inc(ce);
+       GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
 
-       execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-       intel_engine_context_in(rq->engine);
-       rq->hw_context->inflight = rq->engine;
+       return i915_request_get(rq);
 }
 
-static void kick_siblings(struct i915_request *rq)
+static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 {
-       struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+       struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
        struct i915_request *next = READ_ONCE(ve->request);
 
        if (next && next->execution_mask & ~rq->execution_mask)
@@ -533,29 +543,42 @@ static void kick_siblings(struct i915_request *rq)
 }
 
 static inline void
-execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
+execlists_schedule_out(struct i915_request *rq)
 {
-       rq->hw_context->inflight = NULL;
-       intel_engine_context_out(rq->engine);
-       execlists_context_status_change(rq, status);
+       struct intel_context *ce = rq->hw_context;
+
+       GEM_BUG_ON(!intel_context_inflight_count(ce));
+
        trace_i915_request_out(rq);
 
-       /*
-        * If this is part of a virtual engine, its next request may have
-        * been blocked waiting for access to the active context. We have
-        * to kick all the siblings again in case we need to switch (e.g.
-        * the next request is not runnable on this engine). Hopefully,
-        * we will already have submitted the next request before the
-        * tasklet runs and do not need to rebuild each virtual tree
-        * and kick everyone again.
-        */
-       if (rq->engine != rq->hw_context->engine)
-               kick_siblings(rq);
+       intel_context_inflight_dec(ce);
+       if (!intel_context_inflight_count(ce)) {
+               intel_engine_context_out(ce->inflight);
+               execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+               ce->inflight = NULL;
+               intel_context_put(ce);
+
+               /*
+                * If this is part of a virtual engine, its next request may
+                * have been blocked waiting for access to the active context.
+                * We have to kick all the siblings again in case we need to
+                * switch (e.g. the next request is not runnable on this
+                * engine). Hopefully, we will already have submitted the next
+                * request before the tasklet runs and do not need to rebuild
+                * each virtual tree and kick everyone again.
+                */
+               if (rq->engine != ce->engine)
+                       kick_siblings(rq, ce);
+       }
+
+       i915_request_put(rq);
 }
 
-static u64 execlists_update_context(struct i915_request *rq)
+static u64 execlists_update_context(const struct i915_request *rq)
 {
        struct intel_context *ce = rq->hw_context;
+       u64 desc;
 
        ce->lrc_reg_state[CTX_RING_TAIL + 1] =
                intel_ring_set_tail(rq->ring, rq->tail);
@@ -576,7 +599,11 @@ static u64 execlists_update_context(struct i915_request *rq)
         * wmb).
         */
        mb();
-       return ce->lrc_desc;
+
+       desc = ce->lrc_desc;
+       ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+
+       return desc;
 }
 
 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
@@ -590,12 +617,62 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
        }
 }
 
+static __maybe_unused void
+trace_ports(const struct intel_engine_execlists *execlists,
+           const char *msg,
+           struct i915_request * const *ports)
+{
+       const struct intel_engine_cs *engine =
+               container_of(execlists, typeof(*engine), execlists);
+
+       GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
+                 engine->name, msg,
+                 ports[0]->fence.context,
+                 ports[0]->fence.seqno,
+                 i915_request_completed(ports[0]) ? "!" :
+                 i915_request_started(ports[0]) ? "*" :
+                 "",
+                 ports[1] ? ports[1]->fence.context : 0,
+                 ports[1] ? ports[1]->fence.seqno : 0);
+}
+
+static __maybe_unused bool
+assert_pending_valid(const struct intel_engine_execlists *execlists,
+                    const char *msg)
+{
+       struct i915_request * const *port, *rq;
+       struct intel_context *ce = NULL;
+
+       trace_ports(execlists, msg, execlists->pending);
+
+       if (execlists->pending[execlists_num_ports(execlists)])
+               return false;
+
+       for (port = execlists->pending; (rq = *port); port++) {
+               if (ce == rq->hw_context)
+                       return false;
+
+               ce = rq->hw_context;
+               if (i915_request_completed(rq))
+                       continue;
+
+               if (i915_active_is_idle(&ce->active))
+                       return false;
+
+               if (!i915_vma_is_pinned(ce->state))
+                       return false;
+       }
+
+       return ce;
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists *execlists = &engine->execlists;
-       struct execlist_port *port = execlists->port;
        unsigned int n;
 
+       GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
+
        /*
         * We can skip acquiring intel_runtime_pm_get() here as it was taken
         * on our behalf by the request (see i915_gem_mark_busy()) and it will
@@ -613,38 +690,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
         * of elsq entries, keep this in mind before changing the loop below.
         */
        for (n = execlists_num_ports(execlists); n--; ) {
-               struct i915_request *rq;
-               unsigned int count;
-               u64 desc;
+               struct i915_request *rq = execlists->pending[n];
 
-               rq = port_unpack(&port[n], &count);
-               if (rq) {
-                       GEM_BUG_ON(count > !n);
-                       if (!count++)
-                               execlists_context_schedule_in(rq);
-                       port_set(&port[n], port_pack(rq, count));
-                       desc = execlists_update_context(rq);
-                       GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
-
-                       GEM_TRACE("%s in[%d]:  ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-                                 engine->name, n,
-                                 port[n].context_id, count,
-                                 rq->fence.context, rq->fence.seqno,
-                                 hwsp_seqno(rq),
-                                 rq_prio(rq));
-               } else {
-                       GEM_BUG_ON(!n);
-                       desc = 0;
-               }
-
-               write_desc(execlists, desc, n);
+               write_desc(execlists,
+                          rq ? execlists_update_context(rq) : 0,
+                          n);
        }
 
        /* we need to manually load the submit queue */
        if (execlists->ctrl_reg)
                writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-       execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
 static bool ctx_single_port_submission(const struct intel_context *ce)
@@ -668,6 +723,7 @@ static bool can_merge_ctx(const struct intel_context *prev,
 static bool can_merge_rq(const struct i915_request *prev,
                         const struct i915_request *next)
 {
+       GEM_BUG_ON(prev == next);
        GEM_BUG_ON(!assert_priority_queue(prev, next));
 
        if (!can_merge_ctx(prev->hw_context, next->hw_context))
@@ -676,58 +732,6 @@ static bool can_merge_rq(const struct i915_request *prev,
        return true;
 }
 
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
-       GEM_BUG_ON(rq == port_request(port));
-
-       if (port_isset(port))
-               i915_request_put(port_request(port));
-
-       port_set(port, port_pack(i915_request_get(rq), port_count(port)));
-}
-
-static void inject_preempt_context(struct intel_engine_cs *engine)
-{
-       struct intel_engine_execlists *execlists = &engine->execlists;
-       struct intel_context *ce = engine->preempt_context;
-       unsigned int n;
-
-       GEM_BUG_ON(execlists->preempt_complete_status !=
-                  upper_32_bits(ce->lrc_desc));
-
-       /*
-        * Switch to our empty preempt context so
-        * the state of the GPU is known (idle).
-        */
-       GEM_TRACE("%s\n", engine->name);
-       for (n = execlists_num_ports(execlists); --n; )
-               write_desc(execlists, 0, n);
-
-       write_desc(execlists, ce->lrc_desc, n);
-
-       /* we need to manually load the submit queue */
-       if (execlists->ctrl_reg)
-               writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-       execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
-       execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
-
-       (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
-}
-
-static void complete_preempt_context(struct intel_engine_execlists *execlists)
-{
-       GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
-       if (inject_preempt_hang(execlists))
-               return;
-
-       execlists_cancel_port_requests(execlists);
-       __unwind_incomplete_requests(container_of(execlists,
-                                                 struct intel_engine_cs,
-                                                 execlists));
-}
-
 static void virtual_update_register_offsets(u32 *regs,
                                            struct intel_engine_cs *engine)
 {
@@ -792,7 +796,7 @@ static bool virtual_matches(const struct virtual_engine *ve,
         * we reuse the register offsets). This is a very small
         * hystersis on the greedy seelction algorithm.
         */
-       inflight = READ_ONCE(ve->context.inflight);
+       inflight = intel_context_inflight(&ve->context);
        if (inflight && inflight != engine)
                return false;
 
@@ -815,13 +819,23 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
        spin_unlock(&old->breadcrumbs.irq_lock);
 }
 
+static struct i915_request *
+last_active(const struct intel_engine_execlists *execlists)
+{
+       struct i915_request * const *last = execlists->active;
+
+       while (*last && i915_request_completed(*last))
+               last++;
+
+       return *last;
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct execlist_port *port = execlists->port;
-       const struct execlist_port * const last_port =
-               &execlists->port[execlists->port_mask];
-       struct i915_request *last = port_request(port);
+       struct i915_request **port = execlists->pending;
+       struct i915_request ** const last_port = port + execlists->port_mask;
+       struct i915_request *last;
        struct rb_node *rb;
        bool submit = false;
 
@@ -867,65 +881,72 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                break;
        }
 
+       /*
+        * If the queue is higher priority than the last
+        * request in the currently active context, submit afresh.
+        * We will resubmit again afterwards in case we need to split
+        * the active context to interject the preemption request,
+        * i.e. we will retrigger preemption following the ack in case
+        * of trouble.
+        */
+       last = last_active(execlists);
        if (last) {
-               /*
-                * Don't resubmit or switch until all outstanding
-                * preemptions (lite-restore) are seen. Then we
-                * know the next preemption status we see corresponds
-                * to this ELSP update.
-                */
-               GEM_BUG_ON(!execlists_is_active(execlists,
-                                               EXECLISTS_ACTIVE_USER));
-               GEM_BUG_ON(!port_count(&port[0]));
-
-               /*
-                * If we write to ELSP a second time before the HW has had
-                * a chance to respond to the previous write, we can confuse
-                * the HW and hit "undefined behaviour". After writing to ELSP,
-                * we must then wait until we see a context-switch event from
-                * the HW to indicate that it has had a chance to respond.
-                */
-               if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-                       return;
-
                if (need_preempt(engine, last, rb)) {
-                       inject_preempt_context(engine);
-                       return;
-               }
+                       GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n",
+                                 engine->name,
+                                 last->fence.context,
+                                 last->fence.seqno,
+                                 last->sched.attr.priority,
+                                 execlists->queue_priority_hint);
+                       /*
+                        * Don't let the RING_HEAD advance past the breadcrumb
+                        * as we unwind (and until we resubmit) so that we do
+                        * not accidentally tell it to go backwards.
+                        */
+                       ring_set_paused(engine, 1);
 
-               /*
-                * In theory, we could coalesce more requests onto
-                * the second port (the first port is active, with
-                * no preemptions pending). However, that means we
-                * then have to deal with the possible lite-restore
-                * of the second port (as we submit the ELSP, there
-                * may be a context-switch) but also we may complete
-                * the resubmission before the context-switch. Ergo,
-                * coalescing onto the second port will cause a
-                * preemption event, but we cannot predict whether
-                * that will affect port[0] or port[1].
-                *
-                * If the second port is already active, we can wait
-                * until the next context-switch before contemplating
-                * new requests. The GPU will be busy and we should be
-                * able to resubmit the new ELSP before it idles,
-                * avoiding pipeline bubbles (momentary pauses where
-                * the driver is unable to keep up the supply of new
-                * work). However, we have to double check that the
-                * priorities of the ports haven't been switch.
-                */
-               if (port_count(&port[1]))
-                       return;
+                       /*
+                        * Note that we have not stopped the GPU at this point,
+                        * so we are unwinding the incomplete requests as they
+                        * remain inflight and so by the time we do complete
+                        * the preemption, some of the unwound requests may
+                        * complete!
+                        */
+                       __unwind_incomplete_requests(engine);
 
-               /*
-                * WaIdleLiteRestore:bdw,skl
-                * Apply the wa NOOPs to prevent
-                * ring:HEAD == rq:TAIL as we resubmit the
-                * request. See gen8_emit_fini_breadcrumb() for
-                * where we prepare the padding after the
-                * end of the request.
-                */
-               last->tail = last->wa_tail;
+                       /*
+                        * If we need to return to the preempted context, we
+                        * need to skip the lite-restore and force it to
+                        * reload the RING_TAIL. Otherwise, the HW has a
+                        * tendency to ignore us rewinding the TAIL to the
+                        * end of an earlier request.
+                        */
+                       last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+                       last = NULL;
+               } else {
+                       /*
+                        * Otherwise if we already have a request pending
+                        * for execution after the current one, we can
+                        * just wait until the next CS event before
+                        * queuing more. In either case we will force a
+                        * lite-restore preemption event, but if we wait
+                        * we hopefully coalesce several updates into a single
+                        * submission.
+                        */
+                       if (!list_is_last(&last->sched.link,
+                                         &engine->active.requests))
+                               return;
+
+                       /*
+                        * WaIdleLiteRestore:bdw,skl
+                        * Apply the wa NOOPs to prevent
+                        * ring:HEAD == rq:TAIL as we resubmit the
+                        * request. See gen8_emit_fini_breadcrumb() for
+                        * where we prepare the padding after the
+                        * end of the request.
+                        */
+                       last->tail = last->wa_tail;
+               }
        }
 
        while (rb) { /* XXX virtual is always taking precedence */
@@ -955,9 +976,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                continue;
                        }
 
+                       if (i915_request_completed(rq)) {
+                               ve->request = NULL;
+                               ve->base.execlists.queue_priority_hint = INT_MIN;
+                               rb_erase_cached(rb, &execlists->virtual);
+                               RB_CLEAR_NODE(rb);
+
+                               rq->engine = engine;
+                               __i915_request_submit(rq);
+
+                               spin_unlock(&ve->base.active.lock);
+
+                               rb = rb_first_cached(&execlists->virtual);
+                               continue;
+                       }
+
                        if (last && !can_merge_rq(last, rq)) {
                                spin_unlock(&ve->base.active.lock);
-                               return; /* leave this rq for another engine */
+                               return; /* leave this for another */
                        }
 
                        GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
@@ -1006,9 +1042,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                        }
 
                        __i915_request_submit(rq);
-                       trace_i915_request_in(rq, port_index(port, execlists));
-                       submit = true;
-                       last = rq;
+                       if (!i915_request_completed(rq)) {
+                               submit = true;
+                               last = rq;
+                       }
                }
 
                spin_unlock(&ve->base.active.lock);
@@ -1021,6 +1058,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                int i;
 
                priolist_for_each_request_consume(rq, rn, p, i) {
+                       if (i915_request_completed(rq))
+                               goto skip;
+
                        /*
                         * Can we combine this request with the current port?
                         * It has to be the same context/ringbuffer and not
@@ -1060,19 +1100,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                    ctx_single_port_submission(rq->hw_context))
                                        goto done;
 
-
-                               if (submit)
-                                       port_assign(port, last);
+                               *port = execlists_schedule_in(last, port - execlists->pending);
                                port++;
-
-                               GEM_BUG_ON(port_isset(port));
                        }
 
-                       __i915_request_submit(rq);
-                       trace_i915_request_in(rq, port_index(port, execlists));
-
                        last = rq;
                        submit = true;
+skip:
+                       __i915_request_submit(rq);
                }
 
                rb_erase_cached(&p->node, &execlists->queue);
@@ -1097,54 +1132,30 @@ done:
         * interrupt for secondary ports).
         */
        execlists->queue_priority_hint = queue_prio(execlists);
+       GEM_TRACE("%s: queue_priority_hint:%d, submit:%s\n",
+                 engine->name, execlists->queue_priority_hint,
+                 yesno(submit));
 
        if (submit) {
-               port_assign(port, last);
+               *port = execlists_schedule_in(last, port - execlists->pending);
+               memset(port + 1, 0, (last_port - port) * sizeof(*port));
                execlists_submit_ports(engine);
        }
-
-       /* We must always keep the beast fed if we have work piled up */
-       GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
-                  !port_isset(execlists->port));
-
-       /* Re-evaluate the executing context setup after each preemptive kick */
-       if (last)
-               execlists_user_begin(execlists, execlists->port);
-
-       /* If the engine is now idle, so should be the flag; and vice versa. */
-       GEM_BUG_ON(execlists_is_active(&engine->execlists,
-                                      EXECLISTS_ACTIVE_USER) ==
-                  !port_isset(engine->execlists.port));
 }
 
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
-       struct execlist_port *port = execlists->port;
-       unsigned int num_ports = execlists_num_ports(execlists);
-
-       while (num_ports-- && port_isset(port)) {
-               struct i915_request *rq = port_request(port);
+       struct i915_request * const *port, *rq;
 
-               GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
-                         rq->engine->name,
-                         (unsigned int)(port - execlists->port),
-                         rq->fence.context, rq->fence.seqno,
-                         hwsp_seqno(rq));
+       for (port = execlists->pending; (rq = *port); port++)
+               execlists_schedule_out(rq);
+       memset(execlists->pending, 0, sizeof(execlists->pending));
 
-               GEM_BUG_ON(!execlists->active);
-               execlists_context_schedule_out(rq,
-                                              i915_request_completed(rq) ?
-                                              INTEL_CONTEXT_SCHEDULE_OUT :
-                                              INTEL_CONTEXT_SCHEDULE_PREEMPTED);
-
-               i915_request_put(rq);
-
-               memset(port, 0, sizeof(*port));
-               port++;
-       }
-
-       execlists_clear_all_active(execlists);
+       for (port = execlists->active; (rq = *port); port++)
+               execlists_schedule_out(rq);
+       execlists->active =
+               memset(execlists->inflight, 0, sizeof(execlists->inflight));
 }
 
 static inline void
@@ -1163,7 +1174,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
 static void process_csb(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct execlist_port *port = execlists->port;
        const u32 * const buf = execlists->csb_status;
        const u8 num_entries = execlists->csb_size;
        u8 head, tail;
@@ -1198,9 +1208,7 @@ static void process_csb(struct intel_engine_cs *engine)
        rmb();
 
        do {
-               struct i915_request *rq;
                unsigned int status;
-               unsigned int count;
 
                if (++head == num_entries)
                        head = 0;
@@ -1223,68 +1231,38 @@ static void process_csb(struct intel_engine_cs *engine)
                 * status notifier.
                 */
 
-               GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+               GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
                          engine->name, head,
-                         buf[2 * head + 0], buf[2 * head + 1],
-                         execlists->active);
+                         buf[2 * head + 0], buf[2 * head + 1]);
 
                status = buf[2 * head];
-               if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                             GEN8_CTX_STATUS_PREEMPTED))
-                       execlists_set_active(execlists,
-                                            EXECLISTS_ACTIVE_HWACK);
-               if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-                       execlists_clear_active(execlists,
-                                              EXECLISTS_ACTIVE_HWACK);
-
-               if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                       continue;
-
-               /* We should never get a COMPLETED | IDLE_ACTIVE! */
-               GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
-
-               if (status & GEN8_CTX_STATUS_COMPLETE &&
-                   buf[2*head + 1] == execlists->preempt_complete_status) {
-                       GEM_TRACE("%s preempt-idle\n", engine->name);
-                       complete_preempt_context(execlists);
-                       continue;
-               }
-
-               if (status & GEN8_CTX_STATUS_PREEMPTED &&
-                   execlists_is_active(execlists,
-                                       EXECLISTS_ACTIVE_PREEMPT))
-                       continue;
-
-               GEM_BUG_ON(!execlists_is_active(execlists,
-                                               EXECLISTS_ACTIVE_USER));
-
-               rq = port_unpack(port, &count);
-               GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-                         engine->name,
-                         port->context_id, count,
-                         rq ? rq->fence.context : 0,
-                         rq ? rq->fence.seqno : 0,
-                         rq ? hwsp_seqno(rq) : 0,
-                         rq ? rq_prio(rq) : 0);
-
-               /* Check the context/desc id for this event matches */
-               GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
-               GEM_BUG_ON(count == 0);
-               if (--count == 0) {
-                       /*
-                        * On the final event corresponding to the
-                        * submission of this context, we expect either
-                        * an element-switch event or a completion
-                        * event (and on completion, the active-idle
-                        * marker). No more preemptions, lite-restore
-                        * or otherwise.
-                        */
-                       GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-                       GEM_BUG_ON(port_isset(&port[1]) &&
-                                  !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-                       GEM_BUG_ON(!port_isset(&port[1]) &&
-                                  !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+               if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
+                       GEM_BUG_ON(*execlists->active);
+promote:
+                       GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+                       execlists->active =
+                               memcpy(execlists->inflight,
+                                      execlists->pending,
+                                      execlists_num_ports(execlists) *
+                                      sizeof(*execlists->pending));
+                       execlists->pending[0] = NULL;
+
+                       if (!inject_preempt_hang(execlists))
+                               ring_set_paused(engine, 0);
+               } else if (status & GEN8_CTX_STATUS_PREEMPTED) {
+                       struct i915_request * const *port = execlists->active;
+
+                       trace_ports(execlists, "preempted", execlists->active);
+
+                       while (*port)
+                               execlists_schedule_out(*port++);
+
+                       goto promote;
+               } else if (*execlists->active) {
+                       struct i915_request *rq = *execlists->active++;
+
+                       trace_ports(execlists, "completed",
+                                   execlists->active - 1);
 
                        /*
                         * We rely on the hardware being strongly
@@ -1293,21 +1271,10 @@ static void process_csb(struct intel_engine_cs *engine)
                         * user interrupt and CSB is processed.
                         */
                        GEM_BUG_ON(!i915_request_completed(rq));
+                       execlists_schedule_out(rq);
 
-                       execlists_context_schedule_out(rq,
-                                                      INTEL_CONTEXT_SCHEDULE_OUT);
-                       i915_request_put(rq);
-
-                       GEM_TRACE("%s completed ctx=%d\n",
-                                 engine->name, port->context_id);
-
-                       port = execlists_port_complete(execlists, port);
-                       if (port_isset(port))
-                               execlists_user_begin(execlists, port);
-                       else
-                               execlists_user_end(execlists);
-               } else {
-                       port_set(port, port_pack(rq, count));
+                       GEM_BUG_ON(execlists->active - execlists->inflight >
+                                  execlists_num_ports(execlists));
                }
        } while (head != tail);
 
@@ -1332,7 +1299,7 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
        lockdep_assert_held(&engine->active.lock);
 
        process_csb(engine);
-       if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+       if (!engine->execlists.pending[0])
                execlists_dequeue(engine);
 }
 
@@ -1345,11 +1312,6 @@ static void execlists_submission_tasklet(unsigned long data)
        struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
        unsigned long flags;
 
-       GEM_TRACE("%s awake?=%d, active=%x\n",
-                 engine->name,
-                 !!intel_wakeref_active(&engine->wakeref),
-                 engine->execlists.active);
-
        spin_lock_irqsave(&engine->active.lock, flags);
        __execlists_submission_tasklet(engine);
        spin_unlock_irqrestore(&engine->active.lock, flags);
@@ -1376,12 +1338,16 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
                tasklet_hi_schedule(&execlists->tasklet);
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static void submit_queue(struct intel_engine_cs *engine,
+                        const struct i915_request *rq)
 {
-       if (prio > engine->execlists.queue_priority_hint) {
-               engine->execlists.queue_priority_hint = prio;
-               __submit_queue_imm(engine);
-       }
+       struct intel_engine_execlists *execlists = &engine->execlists;
+
+       if (rq_prio(rq) <= execlists->queue_priority_hint)
+               return;
+
+       execlists->queue_priority_hint = rq_prio(rq);
+       __submit_queue_imm(engine);
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1397,7 +1363,7 @@ static void execlists_submit_request(struct i915_request *request)
        GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
        GEM_BUG_ON(list_empty(&request->sched.link));
 
-       submit_queue(engine, rq_prio(request));
+       submit_queue(engine, request);
 
        spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -2048,27 +2014,13 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
        spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
-static bool lrc_regs_ok(const struct i915_request *rq)
-{
-       const struct intel_ring *ring = rq->ring;
-       const u32 *regs = rq->hw_context->lrc_reg_state;
-
-       /* Quick spot check for the common signs of context corruption */
-
-       if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
-           (RING_CTL_SIZE(ring->size) | RING_VALID))
-               return false;
-
-       if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
-               return false;
-
-       return true;
-}
-
-static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+static void reset_csb_pointers(struct intel_engine_cs *engine)
 {
+       struct intel_engine_execlists * const execlists = &engine->execlists;
        const unsigned int reset_value = execlists->csb_size - 1;
 
+       ring_set_paused(engine, 0);
+
        /*
         * After a reset, the HW starts writing into CSB entry [0]. We
         * therefore have to set our HEAD pointer back one entry so that
@@ -2115,18 +2067,21 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
        process_csb(engine); /* drain preemption events */
 
        /* Following the reset, we need to reload the CSB read/write pointers */
-       reset_csb_pointers(&engine->execlists);
+       reset_csb_pointers(engine);
 
        /*
         * Save the currently executing context, even if we completed
         * its request, it was still running at the time of the
         * reset and will have been clobbered.
         */
-       if (!port_isset(execlists->port))
-               goto out_clear;
+       rq = execlists_active(execlists);
+       if (!rq)
+               return;
 
-       rq = port_request(execlists->port);
        ce = rq->hw_context;
+       GEM_BUG_ON(i915_active_is_idle(&ce->active));
+       GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
+       rq = active_request(rq);
 
        /*
         * Catch up with any missed context-switch interrupts.
@@ -2139,9 +2094,12 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
         */
        execlists_cancel_port_requests(execlists);
 
-       rq = active_request(rq);
-       if (!rq)
+       if (!rq) {
+               ce->ring->head = ce->ring->tail;
                goto out_replay;
+       }
+
+       ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
 
        /*
         * If this request hasn't started yet, e.g. it is waiting on a
@@ -2155,7 +2113,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
         * Otherwise, if we have not started yet, the request should replay
         * perfectly and we do not need to flag the result as being erroneous.
         */
-       if (!i915_request_started(rq) && lrc_regs_ok(rq))
+       if (!i915_request_started(rq))
                goto out_replay;
 
        /*
@@ -2170,7 +2128,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
         * image back to the expected values to skip over the guilty request.
         */
        i915_reset_request(rq, stalled);
-       if (!stalled && lrc_regs_ok(rq))
+       if (!stalled)
                goto out_replay;
 
        /*
@@ -2190,17 +2148,13 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
        execlists_init_reg_state(regs, ce, engine, ce->ring);
 
 out_replay:
-       /* Rerun the request; its payload has been neutered (if guilty). */
-       ce->ring->head =
-               rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
+       GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
+                 engine->name, ce->ring->head, ce->ring->tail);
        intel_ring_update_space(ce->ring);
        __execlists_update_reg_state(ce, engine);
 
        /* Push back any incomplete requests for replay after the reset. */
        __unwind_incomplete_requests(engine);
-
-out_clear:
-       execlists_clear_all_active(execlists);
 }
 
 static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
@@ -2296,7 +2250,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
        execlists->queue_priority_hint = INT_MIN;
        execlists->queue = RB_ROOT_CACHED;
-       GEM_BUG_ON(port_isset(execlists->port));
 
        GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
        execlists->tasklet.func = nop_submission_tasklet;
@@ -2514,15 +2467,29 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
        return cs;
 }
 
+static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+       *cs++ = MI_SEMAPHORE_WAIT |
+               MI_SEMAPHORE_GLOBAL_GTT |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_SAD_EQ_SDD;
+       *cs++ = 0;
+       *cs++ = intel_hws_preempt_address(request->engine);
+       *cs++ = 0;
+
+       return cs;
+}
+
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
        cs = gen8_emit_ggtt_write(cs,
                                  request->fence.seqno,
                                  request->timeline->hwsp_offset,
                                  0);
-
        *cs++ = MI_USER_INTERRUPT;
+
        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+       cs = emit_preempt_busywait(request, cs);
 
        request->tail = intel_ring_offset(request, cs);
        assert_ring_tail_valid(request->ring, request->tail);
@@ -2543,9 +2510,10 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
                                    PIPE_CONTROL_FLUSH_ENABLE |
                                    PIPE_CONTROL_CS_STALL,
                                    0);
-
        *cs++ = MI_USER_INTERRUPT;
+
        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+       cs = emit_preempt_busywait(request, cs);
 
        request->tail = intel_ring_offset(request, cs);
        assert_ring_tail_valid(request->ring, request->tail);
@@ -2594,8 +2562,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
        if (!intel_vgpu_active(engine->i915))
                engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-       if (engine->preempt_context &&
-           HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+       if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
                engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 }
 
@@ -2718,11 +2685,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
                        i915_mmio_reg_offset(RING_ELSP(base));
        }
 
-       execlists->preempt_complete_status = ~0u;
-       if (engine->preempt_context)
-               execlists->preempt_complete_status =
-                       upper_32_bits(engine->preempt_context->lrc_desc);
-
        execlists->csb_status =
                &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
@@ -2734,7 +2696,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
        else
                execlists->csb_size = GEN11_CSB_ENTRIES;
 
-       reset_csb_pointers(execlists);
+       reset_csb_pointers(engine);
 
        return 0;
 }
@@ -2917,11 +2879,6 @@ populate_lr_context(struct intel_context *ce,
        if (!engine->default_state)
                regs[CTX_CONTEXT_CONTROL + 1] |=
                        _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-       if (ce->gem_context == engine->i915->preempt_context &&
-           INTEL_GEN(engine->i915) < 11)
-               regs[CTX_CONTEXT_CONTROL + 1] |=
-                       _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-                                          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
 
        ret = 0;
 err_unpin_ctx:
index b7e9fddef270a27811d352368789e4d79cdcbc56..a497cf7acb6aa2eb668dd68c8c4867df20809bb1 100644 (file)
@@ -1248,10 +1248,10 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
        }
 }
 
-static void record_request(struct i915_request *request,
+static void record_request(const struct i915_request *request,
                           struct drm_i915_error_request *erq)
 {
-       struct i915_gem_context *ctx = request->gem_context;
+       const struct i915_gem_context *ctx = request->gem_context;
 
        erq->flags = request->fence.flags;
        erq->context = request->fence.context;
@@ -1315,20 +1315,15 @@ static void engine_record_requests(struct intel_engine_cs *engine,
        ee->num_requests = count;
 }
 
-static void error_record_engine_execlists(struct intel_engine_cs *engine,
+static void error_record_engine_execlists(const struct intel_engine_cs *engine,
                                          struct drm_i915_error_engine *ee)
 {
        const struct intel_engine_execlists * const execlists = &engine->execlists;
-       unsigned int n;
+       struct i915_request * const *port = execlists->active;
+       unsigned int n = 0;
 
-       for (n = 0; n < execlists_num_ports(execlists); n++) {
-               struct i915_request *rq = port_request(&execlists->port[n]);
-
-               if (!rq)
-                       break;
-
-               record_request(rq, &ee->execlist[n]);
-       }
+       while (*port)
+               record_request(*port++, &ee->execlist[n++]);
 
        ee->num_ports = n;
 }
index 7083e6ab92c5cbca8cc51096a1dbdb8f9a7abb8e..0c99694faab7f3ca02158d76621c1bd1cde36366 100644 (file)
@@ -276,6 +276,12 @@ static bool i915_request_retire(struct i915_request *rq)
 
        local_irq_disable();
 
+       /*
+        * We only loosely track inflight requests across preemption,
+        * and so we may find ourselves attempting to retire a _completed_
+        * request that we have removed from the HW and put back on a run
+        * queue.
+        */
        spin_lock(&rq->engine->active.lock);
        list_del(&rq->sched.link);
        spin_unlock(&rq->engine->active.lock);
index edbbdfec24ab2c24714fc423469d23edf709e457..bebc1e9b4a5e1fb52755b8ee7efdfb71c493ec5d 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/dma-fence.h>
 #include <linux/lockdep.h>
 
+#include "gt/intel_context_types.h"
 #include "gt/intel_engine_types.h"
 
 #include "i915_gem.h"
index 2e9b38bdc33c4490cec5e21138544327550e3fc5..b1ba3e65cd529952f405884b8fff2726da707a23 100644 (file)
@@ -179,8 +179,7 @@ static inline int rq_prio(const struct i915_request *rq)
 
 static void kick_submission(struct intel_engine_cs *engine, int prio)
 {
-       const struct i915_request *inflight =
-               port_request(engine->execlists.port);
+       const struct i915_request *inflight = *engine->execlists.active;
 
        /*
         * If we are already the currently executing context, don't
index 2987219a6300b922f0e4358d505aac6ce9cdd7e7..4920ff9aba62abcbc8f5ee61a0c485b1b9094eda 100644 (file)
@@ -131,6 +131,18 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
        ((typeof(ptr))((unsigned long)(ptr) | __bits));                 \
 })
 
+#define ptr_count_dec(p_ptr) do {                                      \
+       typeof(p_ptr) __p = (p_ptr);                                    \
+       unsigned long __v = (unsigned long)(*__p);                      \
+       *__p = (typeof(*p_ptr))(--__v);                                 \
+} while (0)
+
+#define ptr_count_inc(p_ptr) do {                                      \
+       typeof(p_ptr) __p = (p_ptr);                                    \
+       unsigned long __v = (unsigned long)(*__p);                      \
+       *__p = (typeof(*p_ptr))(++__v);                                 \
+} while (0)
+
 #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
 #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
 #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
index db531ebc7704eb1de1753d8ce50581ec3ceaddc4..12c22359fdacc9dd0d393eb8ba672efa87165438 100644 (file)
 #include "intel_guc_submission.h"
 #include "i915_drv.h"
 
-#define GUC_PREEMPT_FINISHED           0x1
+enum {
+       GUC_PREEMPT_NONE = 0,
+       GUC_PREEMPT_INPROGRESS,
+       GUC_PREEMPT_FINISHED,
+};
 #define GUC_PREEMPT_BREADCRUMB_DWORDS  0x8
 #define GUC_PREEMPT_BREADCRUMB_BYTES   \
        (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
@@ -537,15 +541,11 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
        u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
        u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
-       spin_lock(&client->wq_lock);
-
        guc_wq_item_append(client, engine->guc_id, ctx_desc,
                           ring_tail, rq->fence.seqno);
        guc_ring_doorbell(client);
 
        client->submissions[engine->id] += 1;
-
-       spin_unlock(&client->wq_lock);
 }
 
 /*
@@ -631,8 +631,9 @@ static void inject_preempt_context(struct work_struct *work)
        data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
 
        if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
-               execlists_clear_active(&engine->execlists,
-                                      EXECLISTS_ACTIVE_PREEMPT);
+               intel_write_status_page(engine,
+                                       I915_GEM_HWS_PREEMPT,
+                                       GUC_PREEMPT_NONE);
                tasklet_schedule(&engine->execlists.tasklet);
        }
 
@@ -672,8 +673,6 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists *execlists = &engine->execlists;
 
-       GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
        if (inject_preempt_hang(execlists))
                return;
 
@@ -681,89 +680,90 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
        execlists_unwind_incomplete_requests(execlists);
 
        wait_for_guc_preempt_report(engine);
-       intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
+       intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, GUC_PREEMPT_NONE);
 }
 
-/**
- * guc_submit() - Submit commands through GuC
- * @engine: engine associated with the commands
- *
- * The only error here arises if the doorbell hardware isn't functioning
- * as expected, which really shouln't happen.
- */
-static void guc_submit(struct intel_engine_cs *engine)
+static void guc_submit(struct intel_engine_cs *engine,
+                      struct i915_request **out,
+                      struct i915_request **end)
 {
        struct intel_guc *guc = &engine->i915->guc;
-       struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct execlist_port *port = execlists->port;
-       unsigned int n;
+       struct intel_guc_client *client = guc->execbuf_client;
 
-       for (n = 0; n < execlists_num_ports(execlists); n++) {
-               struct i915_request *rq;
-               unsigned int count;
+       spin_lock(&client->wq_lock);
 
-               rq = port_unpack(&port[n], &count);
-               if (rq && count == 0) {
-                       port_set(&port[n], port_pack(rq, ++count));
+       do {
+               struct i915_request *rq = *out++;
 
-                       flush_ggtt_writes(rq->ring->vma);
+               flush_ggtt_writes(rq->ring->vma);
+               guc_add_request(guc, rq);
+       } while (out != end);
 
-                       guc_add_request(guc, rq);
-               }
-       }
+       spin_unlock(&client->wq_lock);
 }
 
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
+static inline int rq_prio(const struct i915_request *rq)
 {
-       GEM_BUG_ON(port_isset(port));
-
-       port_set(port, i915_request_get(rq));
+       return rq->sched.attr.priority | __NO_PREEMPTION;
 }
 
-static inline int rq_prio(const struct i915_request *rq)
+static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 {
-       return rq->sched.attr.priority;
+       trace_i915_request_in(rq, idx);
+
+       if (!rq->hw_context->inflight)
+               rq->hw_context->inflight = rq->engine;
+       intel_context_inflight_inc(rq->hw_context);
+
+       return i915_request_get(rq);
 }
 
-static inline int port_prio(const struct execlist_port *port)
+static void schedule_out(struct i915_request *rq)
 {
-       return rq_prio(port_request(port)) | __NO_PREEMPTION;
+       trace_i915_request_out(rq);
+
+       intel_context_inflight_dec(rq->hw_context);
+       if (!intel_context_inflight_count(rq->hw_context))
+               rq->hw_context->inflight = NULL;
+
+       i915_request_put(rq);
 }
 
-static bool __guc_dequeue(struct intel_engine_cs *engine)
+static void __guc_dequeue(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct execlist_port *port = execlists->port;
-       struct i915_request *last = NULL;
-       const struct execlist_port * const last_port =
-               &execlists->port[execlists->port_mask];
+       struct i915_request **first = execlists->inflight;
+       struct i915_request ** const last_port = first + execlists->port_mask;
+       struct i915_request *last = first[0];
+       struct i915_request **port;
        bool submit = false;
        struct rb_node *rb;
 
        lockdep_assert_held(&engine->active.lock);
 
-       if (port_isset(port)) {
+       if (last) {
                if (intel_engine_has_preemption(engine)) {
                        struct guc_preempt_work *preempt_work =
                                &engine->i915->guc.preempt_work[engine->id];
                        int prio = execlists->queue_priority_hint;
 
-                       if (i915_scheduler_need_preempt(prio,
-                                                       port_prio(port))) {
-                               execlists_set_active(execlists,
-                                                    EXECLISTS_ACTIVE_PREEMPT);
+                       if (i915_scheduler_need_preempt(prio, rq_prio(last))) {
+                               intel_write_status_page(engine,
+                                                       I915_GEM_HWS_PREEMPT,
+                                                       GUC_PREEMPT_INPROGRESS);
                                queue_work(engine->i915->guc.preempt_wq,
                                           &preempt_work->work);
-                               return false;
+                               return;
                        }
                }
 
-               port++;
-               if (port_isset(port))
-                       return false;
+               if (*++first)
+                       return;
+
+               last = NULL;
        }
-       GEM_BUG_ON(port_isset(port));
 
+       port = first;
        while ((rb = rb_first_cached(&execlists->queue))) {
                struct i915_priolist *p = to_priolist(rb);
                struct i915_request *rq, *rn;
@@ -774,18 +774,15 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
                                if (port == last_port)
                                        goto done;
 
-                               if (submit)
-                                       port_assign(port, last);
+                               *port = schedule_in(last,
+                                                   port - execlists->inflight);
                                port++;
                        }
 
                        list_del_init(&rq->sched.link);
-
                        __i915_request_submit(rq);
-                       trace_i915_request_in(rq, port_index(port, execlists));
-
-                       last = rq;
                        submit = true;
+                       last = rq;
                }
 
                rb_erase_cached(&p->node, &execlists->queue);
@@ -794,58 +791,41 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 done:
        execlists->queue_priority_hint =
                rb ? to_priolist(rb)->priority : INT_MIN;
-       if (submit)
-               port_assign(port, last);
-       if (last)
-               execlists_user_begin(execlists, execlists->port);
-
-       /* We must always keep the beast fed if we have work piled up */
-       GEM_BUG_ON(port_isset(execlists->port) &&
-                  !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-       GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
-                  !port_isset(execlists->port));
-
-       return submit;
-}
-
-static void guc_dequeue(struct intel_engine_cs *engine)
-{
-       if (__guc_dequeue(engine))
-               guc_submit(engine);
+       if (submit) {
+               *port = schedule_in(last, port - execlists->inflight);
+               *++port = NULL;
+               guc_submit(engine, first, port);
+       }
+       execlists->active = execlists->inflight;
 }
 
 static void guc_submission_tasklet(unsigned long data)
 {
        struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
        struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct execlist_port *port = execlists->port;
-       struct i915_request *rq;
+       struct i915_request **port, *rq;
        unsigned long flags;
 
        spin_lock_irqsave(&engine->active.lock, flags);
 
-       rq = port_request(port);
-       while (rq && i915_request_completed(rq)) {
-               trace_i915_request_out(rq);
-               i915_request_put(rq);
+       for (port = execlists->inflight; (rq = *port); port++) {
+               if (!i915_request_completed(rq))
+                       break;
 
-               port = execlists_port_complete(execlists, port);
-               if (port_isset(port)) {
-                       execlists_user_begin(execlists, port);
-                       rq = port_request(port);
-               } else {
-                       execlists_user_end(execlists);
-                       rq = NULL;
-               }
+               schedule_out(rq);
+       }
+       if (port != execlists->inflight) {
+               int idx = port - execlists->inflight;
+               int rem = ARRAY_SIZE(execlists->inflight) - idx;
+               memmove(execlists->inflight, port, rem * sizeof(*port));
        }
 
-       if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
-           intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
+       if (intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
            GUC_PREEMPT_FINISHED)
                complete_preempt_context(engine);
 
-       if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
-               guc_dequeue(engine);
+       if (!intel_read_status_page(engine, I915_GEM_HWS_PREEMPT))
+               __guc_dequeue(engine);
 
        spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -959,7 +939,6 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 
        execlists->queue_priority_hint = INT_MIN;
        execlists->queue = RB_ROOT_CACHED;
-       GEM_BUG_ON(port_isset(execlists->port));
 
        spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -1422,7 +1401,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
         * and it is guaranteed that it will remove the work item from the
         * queue before our request is completed.
         */
-       BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) *
+       BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) *
                     sizeof(struct guc_wq_item) *
                     I915_NUM_ENGINES > GUC_WQ_SIZE);
 
index 298bb7116c5110ff285635fe6f8a89e9097a354c..1a5b9e284ca96ab443ea6ef7bdebdc713cc91c3c 100644 (file)
@@ -366,13 +366,15 @@ static int __igt_breadcrumbs_smoketest(void *arg)
 
                if (!wait_event_timeout(wait->wait,
                                        i915_sw_fence_done(wait),
-                                       HZ / 2)) {
+                                       5 * HZ)) {
                        struct i915_request *rq = requests[count - 1];
 
-                       pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
-                              count,
+                       pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
+                              atomic_read(&wait->pending), count,
                               rq->fence.context, rq->fence.seqno,
                               t->engine->name);
+                       GEM_TRACE_DUMP();
+
                        i915_gem_set_wedged(t->engine->i915);
                        GEM_BUG_ON(!i915_request_completed(rq));
                        i915_sw_fence_wait(wait);