drm/i915: Flush TLBs after !RCS PP_DIR_BASE
authorBen Widawsky <benjamin.widawsky@intel.com>
Fri, 6 Dec 2013 22:11:12 +0000 (14:11 -0800)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Wed, 18 Dec 2013 14:29:13 +0000 (15:29 +0100)
I've found this by accident. The docs don't really come out and say you
need to do this. What the docs do tell you is you need to flush the TLBs
before you set the PP_DIR_BASE, and that the RCS will invalidate its
TLBs upon setting the new PP_DIR_BASE. It makes no such comment about
any of the other rings.

Empirically, this indeed fixes a really obvious bug whereby the batches
being sent to the blitter were not executing (we were executing the
HSWP somehow instead).

NOTE: This should make no difference with the current code. It only
applies when we start using multiple VMs.

NOTE2: HSW appears to be immune to this.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_gem_gtt.c

index 08a706d0a737b5b0f1a1358887167c20131188cc..0218e34f178a9de7e35702d8414f51856a792131 100644 (file)
@@ -486,6 +486,50 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
        return (ppgtt->pd_offset / 64) << 16;
 }
 
+static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
+                        struct intel_ring_buffer *ring,
+                        bool synchronous)
+{
+       struct drm_device *dev = ppgtt->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int ret;
+
+       /* If we're in reset, we can assume the GPU is sufficiently idle to
+        * manually frob these bits. Ideally we could use the ring functions,
+        * except our error handling makes it quite difficult (can't use
+        * intel_ring_begin, ring->flush, or intel_ring_advance)
+        *
+        * FIXME: We should try not to special case reset
+        */
+       if (synchronous ||
+           i915_reset_in_progress(&dev_priv->gpu_error)) {
+               WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
+               I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
+               I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
+               POSTING_READ(RING_PP_DIR_BASE(ring));
+               return 0;
+       }
+
+       /* NB: TLBs must be flushed and invalidated before a switch */
+       ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+       if (ret)
+               return ret;
+
+       ret = intel_ring_begin(ring, 6);
+       if (ret)
+               return ret;
+
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
+       intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
+       intel_ring_emit(ring, PP_DIR_DCLV_2G);
+       intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
+       intel_ring_emit(ring, get_pd_offset(ppgtt));
+       intel_ring_emit(ring, MI_NOOP);
+       intel_ring_advance(ring);
+
+       return 0;
+}
+
 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
                          struct intel_ring_buffer *ring,
                          bool synchronous)
@@ -527,6 +571,13 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
        intel_ring_emit(ring, MI_NOOP);
        intel_ring_advance(ring);
 
+       /* XXX: RCS is the only one to auto invalidate the TLBs? */
+       if (ring->id != RCS) {
+               ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+               if (ret)
+                       return ret;
+       }
+
        return 0;
 }
 
@@ -762,6 +813,9 @@ alloc:
        if (IS_GEN6(dev)) {
                ppgtt->enable = gen6_ppgtt_enable;
                ppgtt->switch_mm = gen6_mm_switch;
+       } else if (IS_HASWELL(dev)) {
+               ppgtt->enable = gen7_ppgtt_enable;
+               ppgtt->switch_mm = hsw_mm_switch;
        } else if (IS_GEN7(dev)) {
                ppgtt->enable = gen7_ppgtt_enable;
                ppgtt->switch_mm = gen7_mm_switch;