drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 28 Dec 2018 14:07:35 +0000 (14:07 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 28 Dec 2018 14:43:27 +0000 (14:43 +0000)
The writing is on the wall for the existence of a single execution queue
along each engine, and as a consequence we will not be able to track
dependencies along the HW queue itself, i.e. we will not be able to use
HW semaphores on gen7 as they use a global set of registers (and unlike
gen8+ we can not effectively target memory to keep per-context seqno and
dependencies).

On the positive side, when we implement request reordering for gen7 we
also can not presume a simple execution queue and would also require
removing the current semaphore generation code. So this bring us another
step closer to request reordering for ringbuffer submission!

The negative side is that using interrupts to drive inter-engine
synchronisation is much slower (4us -> 15us to do a nop on each of the 3
engines on ivb). This is much better than it was at the time of introducing
the HW semaphores and equally important userspace weaned itself off
intermixing dependent BLT/RENDER operations (the prime culprit was glyph
rendering in UXA). So while we regress the microbenchmarks, it should not
impact the user.

References: https://bugs.freedesktop.org/show_bug.cgi?id=108888
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181228140736.32606-2-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_timeline.h
drivers/gpu/drm/i915/i915_trace.h
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_hangcheck.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h

index 2d29ce630c0e51146c983becfbe80f816ffff80d..1269d734ade071e83edf86a1bec6ce7cebd7d2ae 100644 (file)
@@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = {
 static int
 i915_next_seqno_set(void *data, u64 val)
 {
-       struct drm_i915_private *dev_priv = data;
-       struct drm_device *dev = &dev_priv->drm;
-       int ret;
-
-       ret = mutex_lock_interruptible(&dev->struct_mutex);
-       if (ret)
-               return ret;
-
-       intel_runtime_pm_get(dev_priv);
-       ret = i915_gem_set_global_seqno(dev, val);
-       intel_runtime_pm_put(dev_priv);
-
-       mutex_unlock(&dev->struct_mutex);
-
-       return ret;
+       return val ? 0 : -EINVAL;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
@@ -4101,9 +4087,6 @@ i915_drop_caches_set(void *data, u64 val)
                                                     I915_WAIT_LOCKED,
                                                     MAX_SCHEDULE_TIMEOUT);
 
-               if (ret == 0 && val & DROP_RESET_SEQNO)
-                       ret = i915_gem_set_global_seqno(&i915->drm, 1);
-
                if (val & DROP_RETIRE)
                        i915_retire_requests(i915);
 
index caa055ac947223ef08b27794d4c3d81b3155040b..dcb935338c6305a59440dc8037634a366287d8f5 100644 (file)
@@ -349,7 +349,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
                value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL);
                break;
        case I915_PARAM_HAS_SEMAPHORES:
-               value = HAS_LEGACY_SEMAPHORES(dev_priv);
+               value = 0;
                break;
        case I915_PARAM_HAS_SECURE_BATCHES:
                value = capable(CAP_SYS_ADMIN);
index 936ec09c94906265c09df34bf71b10858068279a..287f06b9e95a227820952476ba1fbd49f133b700 100644 (file)
@@ -1948,7 +1948,6 @@ struct drm_i915_private {
                struct list_head active_rings;
                struct list_head closed_vma;
                u32 active_requests;
-               u32 request_serial;
 
                /**
                 * Is the GPU currently considered idle, or busy executing
@@ -2396,8 +2395,6 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_BLT(dev_priv)      HAS_ENGINE(dev_priv, BCS)
 #define HAS_VEBOX(dev_priv)    HAS_ENGINE(dev_priv, VECS)
 
-#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN(dev_priv, 7)
-
 #define HAS_LLC(dev_priv)      ((dev_priv)->info.has_llc)
 #define HAS_SNOOP(dev_priv)    ((dev_priv)->info.has_snoop)
 #define HAS_EDRAM(dev_priv)    (!!((dev_priv)->edram_cap & EDRAM_ENABLED))
index 39ee67e01bb783ebc2a193642ca7fd11c4e131e5..9e65c37daa9c2c451e8c0f5b0276bdafe7f2aaeb 100644 (file)
@@ -3318,7 +3318,7 @@ static void nop_submit_request(struct i915_request *request)
 
        spin_lock_irqsave(&request->engine->timeline.lock, flags);
        __i915_request_submit(request);
-       intel_engine_init_global_seqno(request->engine, request->global_seqno);
+       intel_engine_write_global_seqno(request->engine, request->global_seqno);
        spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
 }
 
@@ -3359,7 +3359,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 
        /*
         * Make sure no request can slip through without getting completed by
-        * either this call here to intel_engine_init_global_seqno, or the one
+        * either this call here to intel_engine_write_global_seqno, or the one
         * in nop_submit_request.
         */
        synchronize_rcu();
index 8ab8e8e6a086ae5e9493857d0c854447c89ebb75..2cd1f51963f3dd3ac71730adff32d2efb9b1f4f7 100644 (file)
@@ -111,99 +111,10 @@ i915_request_remove_from_client(struct i915_request *request)
        spin_unlock(&file_priv->mm.lock);
 }
 
-static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
+static void reserve_gt(struct drm_i915_private *i915)
 {
-       struct intel_engine_cs *engine;
-       struct i915_timeline *timeline;
-       enum intel_engine_id id;
-       int ret;
-
-       /* Carefully retire all requests without writing to the rings */
-       ret = i915_gem_wait_for_idle(i915,
-                                    I915_WAIT_INTERRUPTIBLE |
-                                    I915_WAIT_LOCKED,
-                                    MAX_SCHEDULE_TIMEOUT);
-       if (ret)
-               return ret;
-
-       GEM_BUG_ON(i915->gt.active_requests);
-
-       /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-       for_each_engine(engine, i915, id) {
-               GEM_TRACE("%s seqno %d (current %d) -> %d\n",
-                         engine->name,
-                         engine->timeline.seqno,
-                         intel_engine_get_seqno(engine),
-                         seqno);
-
-               if (seqno == engine->timeline.seqno)
-                       continue;
-
-               kthread_park(engine->breadcrumbs.signaler);
-
-               if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
-                       /* Flush any waiters before we reuse the seqno */
-                       intel_engine_disarm_breadcrumbs(engine);
-                       intel_engine_init_hangcheck(engine);
-                       GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
-               }
-
-               /* Check we are idle before we fiddle with hw state! */
-               GEM_BUG_ON(!intel_engine_is_idle(engine));
-               GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
-
-               /* Finally reset hw state */
-               intel_engine_init_global_seqno(engine, seqno);
-               engine->timeline.seqno = seqno;
-
-               kthread_unpark(engine->breadcrumbs.signaler);
-       }
-
-       list_for_each_entry(timeline, &i915->gt.timelines, link)
-               memset(timeline->global_sync, 0, sizeof(timeline->global_sync));
-
-       i915->gt.request_serial = seqno;
-
-       return 0;
-}
-
-int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
-{
-       struct drm_i915_private *i915 = to_i915(dev);
-
-       lockdep_assert_held(&i915->drm.struct_mutex);
-
-       if (seqno == 0)
-               return -EINVAL;
-
-       /* HWS page needs to be set less than what we will inject to ring */
-       return reset_all_global_seqno(i915, seqno - 1);
-}
-
-static int reserve_gt(struct drm_i915_private *i915)
-{
-       int ret;
-
-       /*
-        * Reservation is fine until we may need to wrap around
-        *
-        * By incrementing the serial for every request, we know that no
-        * individual engine may exceed that serial (as each is reset to 0
-        * on any wrap). This protects even the most pessimistic of migrations
-        * of every request from all engines onto just one.
-        */
-       while (unlikely(++i915->gt.request_serial == 0)) {
-               ret = reset_all_global_seqno(i915, 0);
-               if (ret) {
-                       i915->gt.request_serial--;
-                       return ret;
-               }
-       }
-
        if (!i915->gt.active_requests++)
                i915_gem_unpark(i915);
-
-       return 0;
 }
 
 static void unreserve_gt(struct drm_i915_private *i915)
@@ -608,9 +519,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
        if (IS_ERR(ce))
                return ERR_CAST(ce);
 
-       ret = reserve_gt(i915);
-       if (ret)
-               goto err_unpin;
+       reserve_gt(i915);
 
        ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
        if (ret)
@@ -743,7 +652,6 @@ err_unwind:
        kmem_cache_free(i915->requests, rq);
 err_unreserve:
        unreserve_gt(i915);
-err_unpin:
        intel_context_unpin(ce);
        return ERR_PTR(ret);
 }
@@ -771,34 +679,12 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
                ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
                                                       &from->submit,
                                                       I915_FENCE_GFP);
-               return ret < 0 ? ret : 0;
-       }
-
-       if (to->engine->semaphore.sync_to) {
-               u32 seqno;
-
-               GEM_BUG_ON(!from->engine->semaphore.signal);
-
-               seqno = i915_request_global_seqno(from);
-               if (!seqno)
-                       goto await_dma_fence;
-
-               if (seqno <= to->timeline->global_sync[from->engine->id])
-                       return 0;
-
-               trace_i915_gem_ring_sync_to(to, from);
-               ret = to->engine->semaphore.sync_to(to, from);
-               if (ret)
-                       return ret;
-
-               to->timeline->global_sync[from->engine->id] = seqno;
-               return 0;
+       } else {
+               ret = i915_sw_fence_await_dma_fence(&to->submit,
+                                                   &from->fence, 0,
+                                                   I915_FENCE_GFP);
        }
 
-await_dma_fence:
-       ret = i915_sw_fence_await_dma_fence(&to->submit,
-                                           &from->fence, 0,
-                                           I915_FENCE_GFP);
        return ret < 0 ? ret : 0;
 }
 
index ebd71b487220aec95a2bfdeb1c924f0836ae64c1..38c1e15e927a82297f5f9eb43bebed85eccf062b 100644 (file)
@@ -63,14 +63,6 @@ struct i915_timeline {
         * redundant and we can discard it without loss of generality.
         */
        struct i915_syncmap *sync;
-       /**
-        * Separately to the inter-context seqno map above, we track the last
-        * barrier (e.g. semaphore wait) to the global engine timelines. Note
-        * that this tracks global_seqno rather than the context.seqno, and
-        * so it is subject to the limitations of hw wraparound and that we
-        * may need to revoke global_seqno (on pre-emption).
-        */
-       u32 global_sync[I915_NUM_ENGINES];
 
        struct list_head link;
        const char *name;
index b50c6b829715e220c9f3edede3dfa0e83497a804..5cf378936b05c62fcaaaf3112cb7291a053287d0 100644 (file)
@@ -585,35 +585,6 @@ TRACE_EVENT(i915_gem_evict_vm,
            TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
 );
 
-TRACE_EVENT(i915_gem_ring_sync_to,
-           TP_PROTO(struct i915_request *to, struct i915_request *from),
-           TP_ARGS(to, from),
-
-           TP_STRUCT__entry(
-                            __field(u32, dev)
-                            __field(u32, from_class)
-                            __field(u32, from_instance)
-                            __field(u32, to_class)
-                            __field(u32, to_instance)
-                            __field(u32, seqno)
-                            ),
-
-           TP_fast_assign(
-                          __entry->dev = from->i915->drm.primary->index;
-                          __entry->from_class = from->engine->uabi_class;
-                          __entry->from_instance = from->engine->instance;
-                          __entry->to_class = to->engine->uabi_class;
-                          __entry->to_instance = to->engine->instance;
-                          __entry->seqno = from->global_seqno;
-                          ),
-
-           TP_printk("dev=%u, sync-from=%u:%u, sync-to=%u:%u, seqno=%u",
-                     __entry->dev,
-                     __entry->from_class, __entry->from_instance,
-                     __entry->to_class, __entry->to_instance,
-                     __entry->seqno)
-);
-
 TRACE_EVENT(i915_request_queue,
            TP_PROTO(struct i915_request *rq, u32 flags),
            TP_ARGS(rq, flags),
index 561b474cbab13f52a5d80b858f6b8ddcdf2de3b4..78fc777c4bf49137aa9dbee16edfc934b2595243 100644 (file)
@@ -454,25 +454,8 @@ cleanup:
        return err;
 }
 
-void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
+void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
-       struct drm_i915_private *dev_priv = engine->i915;
-
-       /* Our semaphore implementation is strictly monotonic (i.e. we proceed
-        * so long as the semaphore value in the register/page is greater
-        * than the sync value), so whenever we reset the seqno,
-        * so long as we reset the tracking semaphore value to 0, it will
-        * always be before the next request's seqno. If we don't reset
-        * the semaphore value, then when the seqno moves backwards all
-        * future waits will complete instantly (causing rendering corruption).
-        */
-       if (IS_GEN_RANGE(dev_priv, 6, 7)) {
-               I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
-               I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
-               if (HAS_VEBOX(dev_priv))
-                       I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
-       }
-
        intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
        clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
@@ -1300,16 +1283,6 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
                drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine));
        }
 
-       if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
-               drm_printf(m, "\tSYNC_0: 0x%08x\n",
-                          I915_READ(RING_SYNC_0(engine->mmio_base)));
-               drm_printf(m, "\tSYNC_1: 0x%08x\n",
-                          I915_READ(RING_SYNC_1(engine->mmio_base)));
-               if (HAS_VEBOX(dev_priv))
-                       drm_printf(m, "\tSYNC_2: 0x%08x\n",
-                                  I915_READ(RING_SYNC_2(engine->mmio_base)));
-       }
-
        addr = intel_engine_get_active_head(engine);
        drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
                   upper_32_bits(addr), lower_32_bits(addr));
index 495fa145f37f6d1e868a2698ec6cd8d8007c8de8..c3f929f594247f34b54ed5e45ed90535c23617d9 100644 (file)
 
 #include "i915_drv.h"
 
-static bool
-ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
-{
-       ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
-       return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
-                        MI_SEMAPHORE_REGISTER);
-}
-
-static struct intel_engine_cs *
-semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
-                                u64 offset)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-       u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
-       struct intel_engine_cs *signaller;
-       enum intel_engine_id id;
-
-       for_each_engine(signaller, dev_priv, id) {
-               if (engine == signaller)
-                       continue;
-
-               if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
-                       return signaller;
-       }
-
-       DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
-                        engine->name, ipehr);
-
-       return ERR_PTR(-ENODEV);
-}
-
-static struct intel_engine_cs *
-semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-       void __iomem *vaddr;
-       u32 cmd, ipehr, head;
-       u64 offset = 0;
-       int i, backwards;
-
-       /*
-        * This function does not support execlist mode - any attempt to
-        * proceed further into this function will result in a kernel panic
-        * when dereferencing ring->buffer, which is not set up in execlist
-        * mode.
-        *
-        * The correct way of doing it would be to derive the currently
-        * executing ring buffer from the current context, which is derived
-        * from the currently running request. Unfortunately, to get the
-        * current request we would have to grab the struct_mutex before doing
-        * anything else, which would be ill-advised since some other thread
-        * might have grabbed it already and managed to hang itself, causing
-        * the hang checker to deadlock.
-        *
-        * Therefore, this function does not support execlist mode in its
-        * current form. Just return NULL and move on.
-        */
-       if (engine->buffer == NULL)
-               return NULL;
-
-       ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
-       if (!ipehr_is_semaphore_wait(engine, ipehr))
-               return NULL;
-
-       /*
-        * HEAD is likely pointing to the dword after the actual command,
-        * so scan backwards until we find the MBOX. But limit it to just 3
-        * or 4 dwords depending on the semaphore wait command size.
-        * Note that we don't care about ACTHD here since that might
-        * point at at batch, and semaphores are always emitted into the
-        * ringbuffer itself.
-        */
-       head = I915_READ_HEAD(engine) & HEAD_ADDR;
-       backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4;
-       vaddr = (void __iomem *)engine->buffer->vaddr;
-
-       for (i = backwards; i; --i) {
-               /*
-                * Be paranoid and presume the hw has gone off into the wild -
-                * our ring is smaller than what the hardware (and hence
-                * HEAD_ADDR) allows. Also handles wrap-around.
-                */
-               head &= engine->buffer->size - 1;
-
-               /* This here seems to blow up */
-               cmd = ioread32(vaddr + head);
-               if (cmd == ipehr)
-                       break;
-
-               head -= 4;
-       }
-
-       if (!i)
-               return NULL;
-
-       *seqno = ioread32(vaddr + head + 4) + 1;
-       return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
-}
-
-static int semaphore_passed(struct intel_engine_cs *engine)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-       struct intel_engine_cs *signaller;
-       u32 seqno;
-
-       engine->hangcheck.deadlock++;
-
-       signaller = semaphore_waits_for(engine, &seqno);
-       if (signaller == NULL)
-               return -1;
-
-       if (IS_ERR(signaller))
-               return 0;
-
-       /* Prevent pathological recursion due to driver bugs */
-       if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
-               return -1;
-
-       if (intel_engine_signaled(signaller, seqno))
-               return 1;
-
-       /* cursory check for an unkickable deadlock */
-       if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE &&
-           semaphore_passed(signaller) < 0)
-               return -1;
-
-       return 0;
-}
-
-static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
-{
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-
-       for_each_engine(engine, dev_priv, id)
-               engine->hangcheck.deadlock = 0;
-}
-
 static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
 {
        u32 tmp = current_instdone | *old_instdone;
@@ -252,21 +114,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
                return ENGINE_WAIT_KICK;
        }
 
-       if (IS_GEN_RANGE(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
-               switch (semaphore_passed(engine)) {
-               default:
-                       return ENGINE_DEAD;
-               case 1:
-                       i915_handle_error(dev_priv, ALL_ENGINES, 0,
-                                         "stuck semaphore on %s",
-                                         engine->name);
-                       I915_WRITE_CTL(engine, tmp);
-                       return ENGINE_WAIT_KICK;
-               case 0:
-                       return ENGINE_WAIT;
-               }
-       }
-
        return ENGINE_DEAD;
 }
 
@@ -433,8 +280,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
        for_each_engine(engine, dev_priv, id) {
                struct intel_engine_hangcheck hc;
 
-               semaphore_clear_deadlocks(dev_priv);
-
                hangcheck_load_sample(engine, &hc);
                hangcheck_accumulate_sample(engine, &hc);
                hangcheck_store_sample(engine, &hc);
index 1102c2e98222c60d2c93fd648257ed5e5fa613fb..588294a3bbd2f71a3f1b5946e4dc1692713c2a52 100644 (file)
@@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
 
        intel_engine_reset_breadcrumbs(engine);
 
-       if (HAS_LEGACY_SEMAPHORES(engine->i915)) {
-               I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
-               I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
-               if (HAS_VEBOX(dev_priv))
-                       I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
-       }
-
        /* Enforce ordering by reading HEAD register back */
        I915_READ_HEAD(engine);
 
@@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
        return 0;
 }
 
-static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
-{
-       struct drm_i915_private *dev_priv = rq->i915;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       int num_rings = 0;
-
-       for_each_engine(engine, dev_priv, id) {
-               i915_reg_t mbox_reg;
-
-               if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
-                       continue;
-
-               mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id];
-               if (i915_mmio_reg_valid(mbox_reg)) {
-                       *cs++ = MI_LOAD_REGISTER_IMM(1);
-                       *cs++ = i915_mmio_reg_offset(mbox_reg);
-                       *cs++ = rq->global_seqno;
-                       num_rings++;
-               }
-       }
-       if (num_rings & 1)
-               *cs++ = MI_NOOP;
-
-       return cs;
-}
-
 static void cancel_requests(struct intel_engine_cs *engine)
 {
        struct i915_request *request;
@@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 static const int i9xx_emit_breadcrumb_sz = 4;
 
-static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
-       return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs));
-}
-
-static int
-gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal)
-{
-       u32 dw1 = MI_SEMAPHORE_MBOX |
-                 MI_SEMAPHORE_COMPARE |
-                 MI_SEMAPHORE_REGISTER;
-       u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id];
-       u32 *cs;
-
-       WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
-
-       cs = intel_ring_begin(rq, 4);
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
-
-       *cs++ = dw1 | wait_mbox;
-       /* Throughout all of the GEM code, seqno passed implies our current
-        * seqno is >= the last seqno executed. However for hardware the
-        * comparison is strictly greater than.
-        */
-       *cs++ = signal->global_seqno - 1;
-       *cs++ = 0;
-       *cs++ = MI_NOOP;
-       intel_ring_advance(rq, cs);
-
-       return 0;
-}
-
 static void
 gen5_seqno_barrier(struct intel_engine_cs *engine)
 {
@@ -2151,66 +2084,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
        return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
 }
 
-static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
-                                      struct intel_engine_cs *engine)
-{
-       int i;
-
-       if (!HAS_LEGACY_SEMAPHORES(dev_priv))
-               return;
-
-       GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
-       engine->semaphore.sync_to = gen6_ring_sync_to;
-       engine->semaphore.signal = gen6_signal;
-
-       /*
-        * The current semaphore is only applied on pre-gen8
-        * platform.  And there is no VCS2 ring on the pre-gen8
-        * platform. So the semaphore between RCS and VCS2 is
-        * initialized as INVALID.
-        */
-       for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
-               static const struct {
-                       u32 wait_mbox;
-                       i915_reg_t mbox_reg;
-               } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
-                       [RCS_HW] = {
-                               [VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
-                               [BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
-                               [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
-                       },
-                       [VCS_HW] = {
-                               [RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
-                               [BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
-                               [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
-                       },
-                       [BCS_HW] = {
-                               [RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
-                               [VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
-                               [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
-                       },
-                       [VECS_HW] = {
-                               [RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
-                               [VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
-                               [BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
-                       },
-               };
-               u32 wait_mbox;
-               i915_reg_t mbox_reg;
-
-               if (i == engine->hw_id) {
-                       wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
-                       mbox_reg = GEN6_NOSYNC;
-               } else {
-                       wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
-                       mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
-               }
-
-               engine->semaphore.mbox.wait[i] = wait_mbox;
-               engine->semaphore.mbox.signal[i] = mbox_reg;
-       }
-}
-
 static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
                                struct intel_engine_cs *engine)
 {
@@ -2253,7 +2126,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
        GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
 
        intel_ring_init_irq(dev_priv, engine);
-       intel_ring_init_semaphores(dev_priv, engine);
 
        engine->init_hw = init_ring_common;
        engine->reset.prepare = reset_prepare;
@@ -2265,16 +2137,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 
        engine->emit_breadcrumb = i9xx_emit_breadcrumb;
        engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
-       if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
-               int num_rings;
-
-               engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
-
-               num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
-               engine->emit_breadcrumb_sz += num_rings * 3;
-               if (num_rings & 1)
-                       engine->emit_breadcrumb_sz++;
-       }
 
        engine->set_default_submission = i9xx_set_default_submission;
 
index 6b41b9ce5f5b258711f25303d8e487b6a4f73ade..c927bdfb1ed0c14013b41ab1a752fa51dba06730 100644 (file)
@@ -510,60 +510,6 @@ struct intel_engine_cs {
        void            (*irq_seqno_barrier)(struct intel_engine_cs *engine);
        void            (*cleanup)(struct intel_engine_cs *engine);
 
-       /* GEN8 signal/wait table - never trust comments!
-        *        signal to     signal to    signal to   signal to      signal to
-        *          RCS            VCS          BCS        VECS          VCS2
-        *      --------------------------------------------------------------------
-        *  RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
-        *      |-------------------------------------------------------------------
-        *  VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
-        *      |-------------------------------------------------------------------
-        *  BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
-        *      |-------------------------------------------------------------------
-        * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) |  NOP (0x90) | VCS2 (0x98) |
-        *      |-------------------------------------------------------------------
-        * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP  (0xc0) |
-        *      |-------------------------------------------------------------------
-        *
-        * Generalization:
-        *  f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
-        *  ie. transpose of g(x, y)
-        *
-        *       sync from      sync from    sync from    sync from     sync from
-        *          RCS            VCS          BCS        VECS          VCS2
-        *      --------------------------------------------------------------------
-        *  RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
-        *      |-------------------------------------------------------------------
-        *  VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
-        *      |-------------------------------------------------------------------
-        *  BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
-        *      |-------------------------------------------------------------------
-        * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) |  NOP (0x90) | VCS2 (0xb8) |
-        *      |-------------------------------------------------------------------
-        * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) |  NOP (0xc0) |
-        *      |-------------------------------------------------------------------
-        *
-        * Generalization:
-        *  g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
-        *  ie. transpose of f(x, y)
-        */
-       struct {
-#define GEN6_SEMAPHORE_LAST    VECS_HW
-#define GEN6_NUM_SEMAPHORES    (GEN6_SEMAPHORE_LAST + 1)
-#define GEN6_SEMAPHORES_MASK   GENMASK(GEN6_SEMAPHORE_LAST, 0)
-               struct {
-                       /* our mbox written by others */
-                       u32             wait[GEN6_NUM_SEMAPHORES];
-                       /* mboxes this ring signals to */
-                       i915_reg_t      signal[GEN6_NUM_SEMAPHORES];
-               } mbox;
-
-               /* AKA wait() */
-               int     (*sync_to)(struct i915_request *rq,
-                                  struct i915_request *signal);
-               u32     *(*signal)(struct i915_request *rq, u32 *cs);
-       } semaphore;
-
        struct intel_engine_execlists execlists;
 
        /* Contexts are pinned whilst they are active on the GPU. The last
@@ -889,7 +835,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
        return tail;
 }
 
-void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);