drm/i915: Flush GPU relocs harder for gen3
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 7 Dec 2018 13:40:37 +0000 (13:40 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 7 Dec 2018 15:15:24 +0000 (15:15 +0000)
Adding an extra MI_STORE_DWORD_IMM to the gpu relocation path for gen3
was good, but still not good enough. To survive 24+ hours under test we
needed to perform not one, not two but three extra store-dw. Doing so
for each GPU relocation was a little unsightly and since we need to
worry about userspace hitting the same issues, we should apply the dummy
store-dw into the EMIT_FLUSH.

Fixes: 7dd4f6729f92 ("drm/i915: Async GPU relocation processing")
References: 7fa28e146994 ("drm/i915: Write GPU relocs harder with gen3")
Testcase: igt/gem_tiled_fence_blits # blb/pnv
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181207134037.11848-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.c

index 7b3ae2333dbf4cea0cb95c3ead0d9d3e53b4dfa3..1a1c04db6c80651e4bf3a88eb3000bf084caca47 100644 (file)
@@ -1268,7 +1268,7 @@ relocate_entry(struct i915_vma *vma,
                else if (gen >= 4)
                        len = 4;
                else
-                       len = 6;
+                       len = 3;
 
                batch = reloc_gpu(eb, vma, len);
                if (IS_ERR(batch))
@@ -1309,11 +1309,6 @@ relocate_entry(struct i915_vma *vma,
                        *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
                        *batch++ = addr;
                        *batch++ = target_offset;
-
-                       /* And again for good measure (blb/pnv) */
-                       *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-                       *batch++ = addr;
-                       *batch++ = target_offset;
                }
 
                goto out;
index 74a4d587c3129b9d8a91d15c8ba3cc7d94927077..02f6a9b8108384832b474841733c4e5c8b7169c2 100644 (file)
@@ -69,19 +69,28 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
 static int
 gen2_render_ring_flush(struct i915_request *rq, u32 mode)
 {
+       unsigned int num_store_dw;
        u32 cmd, *cs;
 
        cmd = MI_FLUSH;
-
+       num_store_dw = 0;
        if (mode & EMIT_INVALIDATE)
                cmd |= MI_READ_FLUSH;
+       if (mode & EMIT_FLUSH)
+               num_store_dw = 4;
 
-       cs = intel_ring_begin(rq, 2);
+       cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
        if (IS_ERR(cs))
                return PTR_ERR(cs);
 
        *cs++ = cmd;
-       *cs++ = MI_NOOP;
+       while (num_store_dw--) {
+               *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+               *cs++ = i915_scratch_offset(rq->i915);
+               *cs++ = 0;
+       }
+       *cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
+
        intel_ring_advance(rq, cs);
 
        return 0;