drm/i915: Make l3 remapping use the ring
authorBen Widawsky <benjamin.widawsky@intel.com>
Wed, 18 Sep 2013 04:12:44 +0000 (21:12 -0700)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Thu, 19 Sep 2013 18:38:00 +0000 (20:38 +0200)
Using LRI for setting the remapping registers allows us to stream l3
remapping information. This is necessary to handle per context remaps as
we'll see implemented in an upcoming patch.

Using the ring also means we don't need to frob the DOP clock gating
bits.

v2: Add comment about lack of worry for concurrent register access
(Daniel)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
[danvet: Bikeshed the comment a bit by doing a s/XXX/Note - there's
nothing to fix.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_sysfs.c

index c6e8df73756670ab25d575aa42d26fe63cfd5e54..0c39805b881ed75054f3064b3632f3a1efd69d65 100644 (file)
@@ -1949,7 +1949,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
-void i915_gem_l3_remap(struct drm_device *dev, int slice);
+int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
index 21a3d69679eeb418e27c847ad3d078dad3450549..e4f17e5947033251b835977a5f983fcd85255e74 100644 (file)
@@ -4222,35 +4222,35 @@ i915_gem_idle(struct drm_device *dev)
        return 0;
 }
 
-void i915_gem_l3_remap(struct drm_device *dev, int slice)
+int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
 {
+       struct drm_device *dev = ring->dev;
        drm_i915_private_t *dev_priv = dev->dev_private;
        u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
        u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
-       u32 misccpctl;
-       int i;
+       int i, ret;
 
        if (!HAS_L3_GPU_CACHE(dev) || !remap_info)
-               return;
+               return 0;
 
-       misccpctl = I915_READ(GEN7_MISCCPCTL);
-       I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
-       POSTING_READ(GEN7_MISCCPCTL);
+       ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
+       if (ret)
+               return ret;
 
+       /*
+        * Note: We do not worry about the concurrent register cacheline hang
+        * here because no other code should access these registers other than
+        * at initialization time.
+        */
        for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
-               u32 remap = I915_READ(reg_base + i);
-               if (remap && remap != remap_info[i/4])
-                       DRM_DEBUG("0x%x was already programmed to %x\n",
-                                 reg_base + i, remap);
-               if (remap && !remap_info[i/4])
-                       DRM_DEBUG_DRIVER("Clearing remapped register\n");
-               I915_WRITE(reg_base + i, remap_info[i/4]);
+               intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+               intel_ring_emit(ring, reg_base + i);
+               intel_ring_emit(ring, remap_info[i/4]);
        }
 
-       /* Make sure all the writes land before disabling dop clock gating */
-       POSTING_READ(reg_base);
+       intel_ring_advance(ring);
 
-       I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+       return ret;
 }
 
 void i915_gem_init_swizzling(struct drm_device *dev)
@@ -4361,15 +4361,15 @@ i915_gem_init_hw(struct drm_device *dev)
                I915_WRITE(GEN7_MSG_CTL, temp);
        }
 
-       for (i = 0; i < NUM_L3_SLICES(dev); i++)
-               i915_gem_l3_remap(dev, i);
-
        i915_gem_init_swizzling(dev);
 
        ret = i915_gem_init_rings(dev);
        if (ret)
                return ret;
 
+       for (i = 0; i < NUM_L3_SLICES(dev); i++)
+               i915_gem_l3_remap(&dev_priv->ring[RCS], i);
+
        /*
         * XXX: There was some w/a described somewhere suggesting loading
         * contexts before PPGTT.
index 3a8bf0c9b5ce8ea1e56cb62623dbdc8c5dfe5cd9..b07bdfb8892d87037f46ad9197d84e93cf0c4f1d 100644 (file)
@@ -204,7 +204,8 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 
        memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
 
-       i915_gem_l3_remap(drm_dev, slice);
+       if (i915_gem_l3_remap(&dev_priv->ring[RCS], slice))
+               count = 0;
 
        mutex_unlock(&drm_dev->struct_mutex);