From 6faf5916e6beb0dedb0fcbbafbaa152adeaea758 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Dec 2018 14:07:35 +0000 Subject: [PATCH] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation The writing is on the wall for the existence of a single execution queue along each engine, and as a consequence we will not be able to track dependencies along the HW queue itself, i.e. we will not be able to use HW semaphores on gen7 as they use a global set of registers (and unlike gen8+ we can not effectively target memory to keep per-context seqno and dependencies). On the positive side, when we implement request reordering for gen7 we also can not presume a simple execution queue and would also require removing the current semaphore generation code. So this bring us another step closer to request reordering for ringbuffer submission! The negative side is that using interrupts to drive inter-engine synchronisation is much slower (4us -> 15us to do a nop on each of the 3 engines on ivb). This is much better than it was at the time of introducing the HW semaphores and equally important userspace weaned itself off intermixing dependent BLT/RENDER operations (the prime culprit was glyph rendering in UXA). So while we regress the microbenchmarks, it should not impact the user. References: https://bugs.freedesktop.org/show_bug.cgi?id=108888 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20181228140736.32606-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 19 +-- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 3 - drivers/gpu/drm/i915/i915_gem.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 126 +------------------ drivers/gpu/drm/i915/i915_timeline.h | 8 -- drivers/gpu/drm/i915/i915_trace.h | 29 ----- drivers/gpu/drm/i915/intel_engine_cs.c | 29 +---- drivers/gpu/drm/i915/intel_hangcheck.c | 155 ------------------------ drivers/gpu/drm/i915/intel_ringbuffer.c | 138 --------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 56 +-------- 11 files changed, 12 insertions(+), 557 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2d29ce630c0e..1269d734ade0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = { static int i915_next_seqno_set(void *data, u64 val) { - struct drm_i915_private *dev_priv = data; - struct drm_device *dev = &dev_priv->drm; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - intel_runtime_pm_get(dev_priv); - ret = i915_gem_set_global_seqno(dev, val); - intel_runtime_pm_put(dev_priv); - - mutex_unlock(&dev->struct_mutex); - - return ret; + return val ? 0 : -EINVAL; } DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, @@ -4101,9 +4087,6 @@ i915_drop_caches_set(void *data, u64 val) I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); - if (ret == 0 && val & DROP_RESET_SEQNO) - ret = i915_gem_set_global_seqno(&i915->drm, 1); - if (val & DROP_RETIRE) i915_retire_requests(i915); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index caa055ac9472..dcb935338c63 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -349,7 +349,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL); break; case I915_PARAM_HAS_SEMAPHORES: - value = HAS_LEGACY_SEMAPHORES(dev_priv); + value = 0; break; case I915_PARAM_HAS_SECURE_BATCHES: value = capable(CAP_SYS_ADMIN); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 936ec09c9490..287f06b9e95a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1948,7 +1948,6 @@ struct drm_i915_private { struct list_head active_rings; struct list_head closed_vma; u32 active_requests; - u32 request_serial; /** * Is the GPU currently considered idle, or busy executing @@ -2396,8 +2395,6 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) #define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) -#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN(dev_priv, 7) - #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 39ee67e01bb7..9e65c37daa9c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3318,7 +3318,7 @@ static void nop_submit_request(struct i915_request *request) spin_lock_irqsave(&request->engine->timeline.lock, flags); __i915_request_submit(request); - intel_engine_init_global_seqno(request->engine, request->global_seqno); + intel_engine_write_global_seqno(request->engine, request->global_seqno); spin_unlock_irqrestore(&request->engine->timeline.lock, flags); } @@ -3359,7 +3359,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) /* * Make sure no request can slip through without getting completed by - * either this call here to intel_engine_init_global_seqno, or the one + * either this call here to intel_engine_write_global_seqno, or the one * in nop_submit_request. */ synchronize_rcu(); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 8ab8e8e6a086..2cd1f51963f3 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -111,99 +111,10 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) +static void reserve_gt(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - struct i915_timeline *timeline; - enum intel_engine_id id; - int ret; - - /* Carefully retire all requests without writing to the rings */ - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - - GEM_BUG_ON(i915->gt.active_requests); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - for_each_engine(engine, i915, id) { - GEM_TRACE("%s seqno %d (current %d) -> %d\n", - engine->name, - engine->timeline.seqno, - intel_engine_get_seqno(engine), - seqno); - - if (seqno == engine->timeline.seqno) - continue; - - kthread_park(engine->breadcrumbs.signaler); - - if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { - /* Flush any waiters before we reuse the seqno */ - intel_engine_disarm_breadcrumbs(engine); - intel_engine_init_hangcheck(engine); - GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); - } - - /* Check we are idle before we fiddle with hw state! */ - GEM_BUG_ON(!intel_engine_is_idle(engine)); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request)); - - /* Finally reset hw state */ - intel_engine_init_global_seqno(engine, seqno); - engine->timeline.seqno = seqno; - - kthread_unpark(engine->breadcrumbs.signaler); - } - - list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->global_sync, 0, sizeof(timeline->global_sync)); - - i915->gt.request_serial = seqno; - - return 0; -} - -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *i915 = to_i915(dev); - - lockdep_assert_held(&i915->drm.struct_mutex); - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we will inject to ring */ - return reset_all_global_seqno(i915, seqno - 1); -} - -static int reserve_gt(struct drm_i915_private *i915) -{ - int ret; - - /* - * Reservation is fine until we may need to wrap around - * - * By incrementing the serial for every request, we know that no - * individual engine may exceed that serial (as each is reset to 0 - * on any wrap). This protects even the most pessimistic of migrations - * of every request from all engines onto just one. - */ - while (unlikely(++i915->gt.request_serial == 0)) { - ret = reset_all_global_seqno(i915, 0); - if (ret) { - i915->gt.request_serial--; - return ret; - } - } - if (!i915->gt.active_requests++) i915_gem_unpark(i915); - - return 0; } static void unreserve_gt(struct drm_i915_private *i915) @@ -608,9 +519,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (IS_ERR(ce)) return ERR_CAST(ce); - ret = reserve_gt(i915); - if (ret) - goto err_unpin; + reserve_gt(i915); ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST); if (ret) @@ -743,7 +652,6 @@ err_unwind: kmem_cache_free(i915->requests, rq); err_unreserve: unreserve_gt(i915); -err_unpin: intel_context_unpin(ce); return ERR_PTR(ret); } @@ -771,34 +679,12 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, I915_FENCE_GFP); - return ret < 0 ? ret : 0; - } - - if (to->engine->semaphore.sync_to) { - u32 seqno; - - GEM_BUG_ON(!from->engine->semaphore.signal); - - seqno = i915_request_global_seqno(from); - if (!seqno) - goto await_dma_fence; - - if (seqno <= to->timeline->global_sync[from->engine->id]) - return 0; - - trace_i915_gem_ring_sync_to(to, from); - ret = to->engine->semaphore.sync_to(to, from); - if (ret) - return ret; - - to->timeline->global_sync[from->engine->id] = seqno; - return 0; + } else { + ret = i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + I915_FENCE_GFP); } -await_dma_fence: - ret = i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - I915_FENCE_GFP); return ret < 0 ? ret : 0; } diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index ebd71b487220..38c1e15e927a 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -63,14 +63,6 @@ struct i915_timeline { * redundant and we can discard it without loss of generality. */ struct i915_syncmap *sync; - /** - * Separately to the inter-context seqno map above, we track the last - * barrier (e.g. semaphore wait) to the global engine timelines. Note - * that this tracks global_seqno rather than the context.seqno, and - * so it is subject to the limitations of hw wraparound and that we - * may need to revoke global_seqno (on pre-emption). - */ - u32 global_sync[I915_NUM_ENGINES]; struct list_head link; const char *name; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index b50c6b829715..5cf378936b05 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -585,35 +585,6 @@ TRACE_EVENT(i915_gem_evict_vm, TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) ); -TRACE_EVENT(i915_gem_ring_sync_to, - TP_PROTO(struct i915_request *to, struct i915_request *from), - TP_ARGS(to, from), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u32, from_class) - __field(u32, from_instance) - __field(u32, to_class) - __field(u32, to_instance) - __field(u32, seqno) - ), - - TP_fast_assign( - __entry->dev = from->i915->drm.primary->index; - __entry->from_class = from->engine->uabi_class; - __entry->from_instance = from->engine->instance; - __entry->to_class = to->engine->uabi_class; - __entry->to_instance = to->engine->instance; - __entry->seqno = from->global_seqno; - ), - - TP_printk("dev=%u, sync-from=%u:%u, sync-to=%u:%u, seqno=%u", - __entry->dev, - __entry->from_class, __entry->from_instance, - __entry->to_class, __entry->to_instance, - __entry->seqno) -); - TRACE_EVENT(i915_request_queue, TP_PROTO(struct i915_request *rq, u32 flags), TP_ARGS(rq, flags), diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 561b474cbab1..78fc777c4bf4 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -454,25 +454,8 @@ cleanup: return err; } -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) { - struct drm_i915_private *dev_priv = engine->i915; - - /* Our semaphore implementation is strictly monotonic (i.e. we proceed - * so long as the semaphore value in the register/page is greater - * than the sync value), so whenever we reset the seqno, - * so long as we reset the tracking semaphore value to 0, it will - * always be before the next request's seqno. If we don't reset - * the semaphore value, then when the seqno moves backwards all - * future waits will complete instantly (causing rendering corruption). - */ - if (IS_GEN_RANGE(dev_priv, 6, 7)) { - I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); - I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); - if (HAS_VEBOX(dev_priv)) - I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); - } - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); @@ -1300,16 +1283,6 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine)); } - if (HAS_LEGACY_SEMAPHORES(dev_priv)) { - drm_printf(m, "\tSYNC_0: 0x%08x\n", - I915_READ(RING_SYNC_0(engine->mmio_base))); - drm_printf(m, "\tSYNC_1: 0x%08x\n", - I915_READ(RING_SYNC_1(engine->mmio_base))); - if (HAS_VEBOX(dev_priv)) - drm_printf(m, "\tSYNC_2: 0x%08x\n", - I915_READ(RING_SYNC_2(engine->mmio_base))); - } - addr = intel_engine_get_active_head(engine); drm_printf(m, "\tACTHD: 0x%08x_%08x\n", upper_32_bits(addr), lower_32_bits(addr)); diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 495fa145f37f..c3f929f59424 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -24,144 +24,6 @@ #include "i915_drv.h" -static bool -ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) -{ - ipehr &= ~MI_SEMAPHORE_SYNC_MASK; - return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | - MI_SEMAPHORE_REGISTER); -} - -static struct intel_engine_cs * -semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, - u64 offset) -{ - struct drm_i915_private *dev_priv = engine->i915; - u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK; - struct intel_engine_cs *signaller; - enum intel_engine_id id; - - for_each_engine(signaller, dev_priv, id) { - if (engine == signaller) - continue; - - if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) - return signaller; - } - - DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n", - engine->name, ipehr); - - return ERR_PTR(-ENODEV); -} - -static struct intel_engine_cs * -semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) -{ - struct drm_i915_private *dev_priv = engine->i915; - void __iomem *vaddr; - u32 cmd, ipehr, head; - u64 offset = 0; - int i, backwards; - - /* - * This function does not support execlist mode - any attempt to - * proceed further into this function will result in a kernel panic - * when dereferencing ring->buffer, which is not set up in execlist - * mode. - * - * The correct way of doing it would be to derive the currently - * executing ring buffer from the current context, which is derived - * from the currently running request. Unfortunately, to get the - * current request we would have to grab the struct_mutex before doing - * anything else, which would be ill-advised since some other thread - * might have grabbed it already and managed to hang itself, causing - * the hang checker to deadlock. - * - * Therefore, this function does not support execlist mode in its - * current form. Just return NULL and move on. - */ - if (engine->buffer == NULL) - return NULL; - - ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); - if (!ipehr_is_semaphore_wait(engine, ipehr)) - return NULL; - - /* - * HEAD is likely pointing to the dword after the actual command, - * so scan backwards until we find the MBOX. But limit it to just 3 - * or 4 dwords depending on the semaphore wait command size. - * Note that we don't care about ACTHD here since that might - * point at at batch, and semaphores are always emitted into the - * ringbuffer itself. - */ - head = I915_READ_HEAD(engine) & HEAD_ADDR; - backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; - vaddr = (void __iomem *)engine->buffer->vaddr; - - for (i = backwards; i; --i) { - /* - * Be paranoid and presume the hw has gone off into the wild - - * our ring is smaller than what the hardware (and hence - * HEAD_ADDR) allows. Also handles wrap-around. - */ - head &= engine->buffer->size - 1; - - /* This here seems to blow up */ - cmd = ioread32(vaddr + head); - if (cmd == ipehr) - break; - - head -= 4; - } - - if (!i) - return NULL; - - *seqno = ioread32(vaddr + head + 4) + 1; - return semaphore_wait_to_signaller_ring(engine, ipehr, offset); -} - -static int semaphore_passed(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_engine_cs *signaller; - u32 seqno; - - engine->hangcheck.deadlock++; - - signaller = semaphore_waits_for(engine, &seqno); - if (signaller == NULL) - return -1; - - if (IS_ERR(signaller)) - return 0; - - /* Prevent pathological recursion due to driver bugs */ - if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) - return -1; - - if (intel_engine_signaled(signaller, seqno)) - return 1; - - /* cursory check for an unkickable deadlock */ - if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE && - semaphore_passed(signaller) < 0) - return -1; - - return 0; -} - -static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) - engine->hangcheck.deadlock = 0; -} - static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) { u32 tmp = current_instdone | *old_instdone; @@ -252,21 +114,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) return ENGINE_WAIT_KICK; } - if (IS_GEN_RANGE(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) { - switch (semaphore_passed(engine)) { - default: - return ENGINE_DEAD; - case 1: - i915_handle_error(dev_priv, ALL_ENGINES, 0, - "stuck semaphore on %s", - engine->name); - I915_WRITE_CTL(engine, tmp); - return ENGINE_WAIT_KICK; - case 0: - return ENGINE_WAIT; - } - } - return ENGINE_DEAD; } @@ -433,8 +280,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work) for_each_engine(engine, dev_priv, id) { struct intel_engine_hangcheck hc; - semaphore_clear_deadlocks(dev_priv); - hangcheck_load_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc); hangcheck_store_sample(engine, &hc); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1102c2e98222..588294a3bbd2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine) intel_engine_reset_breadcrumbs(engine); - if (HAS_LEGACY_SEMAPHORES(engine->i915)) { - I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); - I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); - if (HAS_VEBOX(dev_priv)) - I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); - } - /* Enforce ordering by reading HEAD register back */ I915_READ_HEAD(engine); @@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine) return 0; } -static u32 *gen6_signal(struct i915_request *rq, u32 *cs) -{ - struct drm_i915_private *dev_priv = rq->i915; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int num_rings = 0; - - for_each_engine(engine, dev_priv, id) { - i915_reg_t mbox_reg; - - if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) - continue; - - mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id]; - if (i915_mmio_reg_valid(mbox_reg)) { - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(mbox_reg); - *cs++ = rq->global_seqno; - num_rings++; - } - } - if (num_rings & 1) - *cs++ = MI_NOOP; - - return cs; -} - static void cancel_requests(struct intel_engine_cs *engine) { struct i915_request *request; @@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) static const int i9xx_emit_breadcrumb_sz = 4; -static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs) -{ - return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs)); -} - -static int -gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal) -{ - u32 dw1 = MI_SEMAPHORE_MBOX | - MI_SEMAPHORE_COMPARE | - MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id]; - u32 *cs; - - WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = dw1 | wait_mbox; - /* Throughout all of the GEM code, seqno passed implies our current - * seqno is >= the last seqno executed. However for hardware the - * comparison is strictly greater than. - */ - *cs++ = signal->global_seqno - 1; - *cs++ = 0; - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - static void gen5_seqno_barrier(struct intel_engine_cs *engine) { @@ -2151,66 +2084,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode) return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); } -static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - int i; - - if (!HAS_LEGACY_SEMAPHORES(dev_priv)) - return; - - GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); - engine->semaphore.sync_to = gen6_ring_sync_to; - engine->semaphore.signal = gen6_signal; - - /* - * The current semaphore is only applied on pre-gen8 - * platform. And there is no VCS2 ring on the pre-gen8 - * platform. So the semaphore between RCS and VCS2 is - * initialized as INVALID. - */ - for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { - static const struct { - u32 wait_mbox; - i915_reg_t mbox_reg; - } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { - [RCS_HW] = { - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, - }, - [VCS_HW] = { - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, - }, - [BCS_HW] = { - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, - }, - [VECS_HW] = { - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, - }, - }; - u32 wait_mbox; - i915_reg_t mbox_reg; - - if (i == engine->hw_id) { - wait_mbox = MI_SEMAPHORE_SYNC_INVALID; - mbox_reg = GEN6_NOSYNC; - } else { - wait_mbox = sem_data[engine->hw_id][i].wait_mbox; - mbox_reg = sem_data[engine->hw_id][i].mbox_reg; - } - - engine->semaphore.mbox.wait[i] = wait_mbox; - engine->semaphore.mbox.signal[i] = mbox_reg; - } -} - static void intel_ring_init_irq(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { @@ -2253,7 +2126,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); intel_ring_init_irq(dev_priv, engine); - intel_ring_init_semaphores(dev_priv, engine); engine->init_hw = init_ring_common; engine->reset.prepare = reset_prepare; @@ -2265,16 +2137,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; - if (HAS_LEGACY_SEMAPHORES(dev_priv)) { - int num_rings; - - engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; - - num_rings = INTEL_INFO(dev_priv)->num_rings - 1; - engine->emit_breadcrumb_sz += num_rings * 3; - if (num_rings & 1) - engine->emit_breadcrumb_sz++; - } engine->set_default_submission = i9xx_set_default_submission; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6b41b9ce5f5b..c927bdfb1ed0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -510,60 +510,6 @@ struct intel_engine_cs { void (*irq_seqno_barrier)(struct intel_engine_cs *engine); void (*cleanup)(struct intel_engine_cs *engine); - /* GEN8 signal/wait table - never trust comments! - * signal to signal to signal to signal to signal to - * RCS VCS BCS VECS VCS2 - * -------------------------------------------------------------------- - * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | - * |------------------------------------------------------------------- - * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | - * |------------------------------------------------------------------- - * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | - * |------------------------------------------------------------------- - * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | - * |------------------------------------------------------------------- - * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | - * |------------------------------------------------------------------- - * - * Generalization: - * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) - * ie. transpose of g(x, y) - * - * sync from sync from sync from sync from sync from - * RCS VCS BCS VECS VCS2 - * -------------------------------------------------------------------- - * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | - * |------------------------------------------------------------------- - * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | - * |------------------------------------------------------------------- - * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | - * |------------------------------------------------------------------- - * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | - * |------------------------------------------------------------------- - * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | - * |------------------------------------------------------------------- - * - * Generalization: - * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) - * ie. transpose of f(x, y) - */ - struct { -#define GEN6_SEMAPHORE_LAST VECS_HW -#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) -#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) - struct { - /* our mbox written by others */ - u32 wait[GEN6_NUM_SEMAPHORES]; - /* mboxes this ring signals to */ - i915_reg_t signal[GEN6_NUM_SEMAPHORES]; - } mbox; - - /* AKA wait() */ - int (*sync_to)(struct i915_request *rq, - struct i915_request *signal); - u32 *(*signal)(struct i915_request *rq, u32 *cs); - } semaphore; - struct intel_engine_execlists execlists; /* Contexts are pinned whilst they are active on the GPU. The last @@ -889,7 +835,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) return tail; } -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); -- 2.30.2