drm/i915: Measure the required reserved size for request emission
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 25 Jan 2019 10:05:20 +0000 (10:05 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 25 Jan 2019 11:19:39 +0000 (11:19 +0000)
Instead of tediously and fragilely counting up the number of dwords
required to emit the breadcrumb to seal a request, fake a request and
measure it automatically once during engine setup.

The downside is that this requires a fair amount of mocking to create a
proper breadcrumb. Still, should be less error prone in future as the
breadcrumb size fluctuates!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125100520.20163-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/i915/selftests/mock_engine.c

index 2f3c71f6d31323f67575dbaef38607d8056594a0..8f738a7cd117e007984cd40805db9921cff46574 100644 (file)
@@ -604,6 +604,47 @@ static void __intel_context_unpin(struct i915_gem_context *ctx,
        intel_context_unpin(to_intel_context(ctx, engine));
 }
 
+struct measure_breadcrumb {
+       struct i915_request rq;
+       struct i915_timeline timeline;
+       struct intel_ring ring;
+       u32 cs[1024];
+};
+
+static int measure_breadcrumb_sz(struct intel_engine_cs *engine)
+{
+       struct measure_breadcrumb *frame;
+       unsigned int dw;
+
+       GEM_BUG_ON(!engine->i915->gt.scratch);
+
+       frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+       if (!frame)
+               return -ENOMEM;
+
+       i915_timeline_init(engine->i915, &frame->timeline, "measure");
+
+       INIT_LIST_HEAD(&frame->ring.request_list);
+       frame->ring.timeline = &frame->timeline;
+       frame->ring.vaddr = frame->cs;
+       frame->ring.size = sizeof(frame->cs);
+       frame->ring.effective_size = frame->ring.size;
+       intel_ring_update_space(&frame->ring);
+
+       frame->rq.i915 = engine->i915;
+       frame->rq.engine = engine;
+       frame->rq.ring = &frame->ring;
+       frame->rq.timeline = &frame->timeline;
+
+       dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+       GEM_BUG_ON(dw != engine->emit_breadcrumb_sz);
+
+       i915_timeline_fini(&frame->timeline);
+       kfree(frame);
+
+       return dw;
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -657,8 +698,16 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
        if (ret)
                goto err_breadcrumbs;
 
+       ret = measure_breadcrumb_sz(engine);
+       if (ret < 0)
+               goto err_status_page;
+
+       engine->emit_breadcrumb_sz = ret;
+
        return 0;
 
+err_status_page:
+       cleanup_status_page(engine);
 err_breadcrumbs:
        intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
index 9155cc675924c987350fa850efff7d294e801ad0..d2299425cf2f3e4cc35a5e06033a09b9dcb1e7e2 100644 (file)
@@ -2051,15 +2051,17 @@ static int gen8_emit_flush_render(struct i915_request *request,
  * used as a workaround for not being allowed to do lite
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
-static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 {
        /* Ensure there's always at least one preemption point per-request. */
        *cs++ = MI_ARB_CHECK;
        *cs++ = MI_NOOP;
        request->wa_tail = intel_ring_offset(request, cs);
+
+       return cs;
 }
 
-static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
 {
        /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
        BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
@@ -2071,11 +2073,11 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
        request->tail = intel_ring_offset(request, cs);
        assert_ring_tail_valid(request->ring, request->tail);
 
-       gen8_emit_wa_tail(request, cs);
+       return gen8_emit_wa_tail(request, cs);
 }
 static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
 
-static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
        /* We're using qword write, seqno should be aligned to 8 bytes. */
        BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
@@ -2095,7 +2097,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
        request->tail = intel_ring_offset(request, cs);
        assert_ring_tail_valid(request->ring, request->tail);
 
-       gen8_emit_wa_tail(request, cs);
+       return gen8_emit_wa_tail(request, cs);
 }
 static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
 
index e39e483d8d16b0bc224a97c4074f32cbb800e120..107c4934e2fad57f9c06d011dc036dd80e923d20 100644 (file)
@@ -299,7 +299,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
        return 0;
 }
 
-static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
        /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
        *cs++ = GFX_OP_PIPE_CONTROL(4);
@@ -327,6 +327,8 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
        rq->tail = intel_ring_offset(rq, cs);
        assert_ring_tail_valid(rq->ring, rq->tail);
+
+       return cs;
 }
 static const int gen6_rcs_emit_breadcrumb_sz = 14;
 
@@ -409,7 +411,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
        return 0;
 }
 
-static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
        *cs++ = GFX_OP_PIPE_CONTROL(4);
        *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -427,10 +429,12 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
        rq->tail = intel_ring_offset(rq, cs);
        assert_ring_tail_valid(rq->ring, rq->tail);
+
+       return cs;
 }
 static const int gen7_rcs_emit_breadcrumb_sz = 6;
 
-static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
        *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
        *cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
@@ -439,11 +443,13 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
        rq->tail = intel_ring_offset(rq, cs);
        assert_ring_tail_valid(rq->ring, rq->tail);
+
+       return cs;
 }
 static const int gen6_xcs_emit_breadcrumb_sz = 4;
 
 #define GEN7_XCS_WA 32
-static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
        int i;
 
@@ -466,6 +472,8 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
        rq->tail = intel_ring_offset(rq, cs);
        assert_ring_tail_valid(rq->ring, rq->tail);
+
+       return cs;
 }
 static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
 #undef GEN7_XCS_WA
@@ -861,7 +869,7 @@ static void i9xx_submit_request(struct i915_request *request)
                        intel_ring_set_tail(request->ring, request->tail));
 }
 
-static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
        *cs++ = MI_FLUSH;
 
@@ -874,11 +882,13 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
        rq->tail = intel_ring_offset(rq, cs);
        assert_ring_tail_valid(rq->ring, rq->tail);
+
+       return cs;
 }
 static const int i9xx_emit_breadcrumb_sz = 6;
 
 #define GEN5_WA_STORES 8 /* must be at least 1! */
-static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
        int i;
 
@@ -895,6 +905,8 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
        rq->tail = intel_ring_offset(rq, cs);
        assert_ring_tail_valid(rq->ring, rq->tail);
+
+       return cs;
 }
 static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
 #undef GEN5_WA_STORES
index c3ef0f9bf321a442bba48fdd78b533c1e4d071bc..479bd53d4ac6634eeb37bd7ffd6282723f8fb865 100644 (file)
@@ -470,7 +470,7 @@ struct intel_engine_cs {
                                         unsigned int dispatch_flags);
 #define I915_DISPATCH_SECURE BIT(0)
 #define I915_DISPATCH_PINNED BIT(1)
-       void            (*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
+       u32             *(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
        int             emit_breadcrumb_sz;
 
        /* Pass the request to the hardware queue (e.g. directly into
index 442ec2aeec81def7708a334f9d88261f541f7607..905318b7ae1898b600e1a4db185065516c98fbd5 100644 (file)
@@ -159,9 +159,9 @@ static int mock_emit_flush(struct i915_request *request,
        return 0;
 }
 
-static void mock_emit_breadcrumb(struct i915_request *request,
-                                u32 *flags)
+static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
 {
+       return cs;
 }
 
 static void mock_submit_request(struct i915_request *request)