drm/i915: turbo & RC6 support for VLV v7
authorJesse Barnes <jbarnes@virtuousgeek.org>
Wed, 17 Apr 2013 22:54:58 +0000 (15:54 -0700)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Thu, 18 Apr 2013 08:15:43 +0000 (10:15 +0200)
Uses slightly different interfaces than other platforms.

v2: track actual set freq, not requested (Rohit)
    fix debug prints in init code (Jesse)
v3: don't write sleep reg (Jesse)
    re-add RC6 wake limit write (Ben)
    fixup thresholds to match other platforms (Ben)
    clean up mem freq calculation (Ben)
    clean up debug prints (Ben)
v4: move defines from punit patch (Ville)
v5: remove writes to nonexistent regs (Jesse)
    put RP and RC regs together (Jesse)
    fix RC6 enable (Jesse)
v6: use correct fuse reads from NC (Jesse)
    split out min/max funcs for use in sysfs (Jesse)
    add debugfs & sysfs freq controls (Jesse)
v7: update with Ben's hw_max changes (Jesse)

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net> (v6)
[danvet: Follow checkpatch sugggestion to use min_t to avoid casting
fun.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_sysfs.c
drivers/gpu/drm/i915/intel_pm.c

index e913d325d5b808d184a82f760e90f7505cec251e..367b534d2260dc41c90e31e29d9e2daf089e331b 100644 (file)
@@ -941,7 +941,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
                           MEMSTAT_VID_SHIFT);
                seq_printf(m, "Current P-state: %d\n",
                           (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
-       } else if (IS_GEN6(dev) || IS_GEN7(dev)) {
+       } else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) {
                u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
                u32 rp_state_limits = I915_READ(GEN6_RP_STATE_LIMITS);
                u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
@@ -1009,6 +1009,25 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 
                seq_printf(m, "Max overclocked frequency: %dMHz\n",
                           dev_priv->rps.hw_max * GT_FREQUENCY_MULTIPLIER);
+       } else if (IS_VALLEYVIEW(dev)) {
+               u32 freq_sts, val;
+
+               valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS,
+                                     &freq_sts);
+               seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
+               seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
+
+               valleyview_punit_read(dev_priv, PUNIT_FUSE_BUS1, &val);
+               seq_printf(m, "max GPU freq: %d MHz\n",
+                          vlv_gpu_freq(dev_priv->mem_freq, val));
+
+               valleyview_punit_read(dev_priv, PUNIT_REG_GPU_LFM, &val);
+               seq_printf(m, "min GPU freq: %d MHz\n",
+                          vlv_gpu_freq(dev_priv->mem_freq, val));
+
+               seq_printf(m, "current GPU freq: %d MHz\n",
+                          vlv_gpu_freq(dev_priv->mem_freq,
+                                       (freq_sts >> 8) & 0xff));
        } else {
                seq_printf(m, "no P-state info available\n");
        }
@@ -1812,7 +1831,11 @@ i915_max_freq_get(void *data, u64 *val)
        if (ret)
                return ret;
 
-       *val = dev_priv->rps.max_delay * GT_FREQUENCY_MULTIPLIER;
+       if (IS_VALLEYVIEW(dev))
+               *val = vlv_gpu_freq(dev_priv->mem_freq,
+                                   dev_priv->rps.max_delay);
+       else
+               *val = dev_priv->rps.max_delay * GT_FREQUENCY_MULTIPLIER;
        mutex_unlock(&dev_priv->rps.hw_lock);
 
        return 0;
@@ -1837,9 +1860,16 @@ i915_max_freq_set(void *data, u64 val)
        /*
         * Turbo will still be enabled, but won't go above the set value.
         */
-       do_div(val, GT_FREQUENCY_MULTIPLIER);
-       dev_priv->rps.max_delay = val;
-       gen6_set_rps(dev, val);
+       if (IS_VALLEYVIEW(dev)) {
+               val = vlv_freq_opcode(dev_priv->mem_freq, val);
+               dev_priv->rps.max_delay = val;
+               gen6_set_rps(dev, val);
+       } else {
+               do_div(val, GT_FREQUENCY_MULTIPLIER);
+               dev_priv->rps.max_delay = val;
+               gen6_set_rps(dev, val);
+       }
+
        mutex_unlock(&dev_priv->rps.hw_lock);
 
        return 0;
@@ -1863,7 +1893,11 @@ i915_min_freq_get(void *data, u64 *val)
        if (ret)
                return ret;
 
-       *val = dev_priv->rps.min_delay * GT_FREQUENCY_MULTIPLIER;
+       if (IS_VALLEYVIEW(dev))
+               *val = vlv_gpu_freq(dev_priv->mem_freq,
+                                   dev_priv->rps.min_delay);
+       else
+               *val = dev_priv->rps.min_delay * GT_FREQUENCY_MULTIPLIER;
        mutex_unlock(&dev_priv->rps.hw_lock);
 
        return 0;
@@ -1888,9 +1922,15 @@ i915_min_freq_set(void *data, u64 val)
        /*
         * Turbo will still be enabled, but won't go below the set value.
         */
-       do_div(val, GT_FREQUENCY_MULTIPLIER);
-       dev_priv->rps.min_delay = val;
-       gen6_set_rps(dev, val);
+       if (IS_VALLEYVIEW(dev)) {
+               val = vlv_freq_opcode(dev_priv->mem_freq, val);
+               dev_priv->rps.min_delay = val;
+               valleyview_set_rps(dev, val);
+       } else {
+               do_div(val, GT_FREQUENCY_MULTIPLIER);
+               dev_priv->rps.min_delay = val;
+               gen6_set_rps(dev, val);
+       }
        mutex_unlock(&dev_priv->rps.hw_lock);
 
        return 0;
index abb065502deb939b3b6fbb0f17cf47f6fd921ce5..bd2d7f17393e431cdb64a501f628691d1131447f 100644 (file)
@@ -1856,6 +1856,9 @@ extern void intel_disable_fbc(struct drm_device *dev);
 extern bool ironlake_set_drps(struct drm_device *dev, u8 val);
 extern void intel_init_pch_refclk(struct drm_device *dev);
 extern void gen6_set_rps(struct drm_device *dev, u8 val);
+extern void valleyview_set_rps(struct drm_device *dev, u8 val);
+extern int valleyview_rps_max_freq(struct drm_i915_private *dev_priv);
+extern int valleyview_rps_min_freq(struct drm_i915_private *dev_priv);
 extern void intel_detect_pch(struct drm_device *dev);
 extern int intel_trans_dp_port_sel(struct drm_crtc *crtc);
 extern int intel_enable_rc6(const struct drm_device *dev);
@@ -1887,6 +1890,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val);
 int valleyview_punit_read(struct drm_i915_private *dev_priv, u8 addr, u32 *val);
 int valleyview_punit_write(struct drm_i915_private *dev_priv, u8 addr, u32 val);
+int valleyview_nc_read(struct drm_i915_private *dev_priv, u8 addr, u32 *val);
+
 int vlv_gpu_freq(int ddr_freq, int val);
 int vlv_freq_opcode(int ddr_freq, int val);
 
index 3af983fad67c51e0c564f3102746ceb50a0c4b04..932e7f8b6d5c2c10321b6c355ccac52a9e4fc997 100644 (file)
@@ -482,7 +482,10 @@ static void gen6_pm_rps_work(struct work_struct *work)
         */
        if (!(new_delay > dev_priv->rps.max_delay ||
              new_delay < dev_priv->rps.min_delay)) {
-               gen6_set_rps(dev_priv->dev, new_delay);
+               if (IS_VALLEYVIEW(dev_priv->dev))
+                       valleyview_set_rps(dev_priv->dev, new_delay);
+               else
+                       gen6_set_rps(dev_priv->dev, new_delay);
        }
 
        mutex_unlock(&dev_priv->rps.hw_lock);
index 31de7e4b1f3e5f4181083cd5d37f2adc2369fb37..66fb8dd2822571d056e4dde14a6e8ca1d4630671 100644 (file)
 #define   GEN6_RC_CTL_RC6_ENABLE               (1<<18)
 #define   GEN6_RC_CTL_RC1e_ENABLE              (1<<20)
 #define   GEN6_RC_CTL_RC7_ENABLE               (1<<22)
+#define   GEN7_RC_CTL_TO_MODE                  (1<<28)
 #define   GEN6_RC_CTL_EI_MODE(x)               ((x)<<27)
 #define   GEN6_RC_CTL_HW_ENABLE                        (1<<31)
 #define GEN6_RP_DOWN_TIMEOUT                   0xA010
 #define   IOSF_BAR_SHIFT                       1
 #define   IOSF_SB_BUSY                         (1<<0)
 #define   IOSF_PORT_PUNIT                      0x4
+#define   IOSF_PORT_NC                         0x11
 #define VLV_IOSF_DATA                          0x182104
 #define VLV_IOSF_ADDR                          0x182108
 
 #define PUNIT_OPCODE_REG_READ                  6
 #define PUNIT_OPCODE_REG_WRITE                 7
 
+#define PUNIT_REG_GPU_LFM                      0xd3
+#define PUNIT_REG_GPU_FREQ_REQ                 0xd4
+#define PUNIT_REG_GPU_FREQ_STS                 0xd8
+#define PUNIT_REG_MEDIA_TURBO_FREQ_REQ         0xdc
+
+#define PUNIT_FUSE_BUS2                                0xf6 /* bits 47:40 */
+#define PUNIT_FUSE_BUS1                                0xf5 /* bits 55:48 */
+
+#define IOSF_NC_FB_GFX_FREQ_FUSE               0x1c
+#define   FB_GFX_MAX_FREQ_FUSE_SHIFT           3
+#define   FB_GFX_MAX_FREQ_FUSE_MASK            0x000007f8
+#define   FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT   11
+#define   FB_GFX_FGUARANTEED_FREQ_FUSE_MASK    0x0007f800
+#define IOSF_NC_FB_GFX_FMAX_FUSE_HI            0x34
+#define   FB_FMAX_VMIN_FREQ_HI_MASK            0x00000007
+#define IOSF_NC_FB_GFX_FMAX_FUSE_LO            0x30
+#define   FB_FMAX_VMIN_FREQ_LO_SHIFT           27
+#define   FB_FMAX_VMIN_FREQ_LO_MASK            0xf8000000
+
 #define GEN6_GT_CORE_STATUS            0x138060
 #define   GEN6_CORE_CPD_STATE_MASK     (7<<4)
 #define   GEN6_RCn_MASK                        7
index d5e1890678f9e61d551af21484dd2233fd8e817c..ca00df2de07b1156def566c48ffdc3fdd7a107db 100644 (file)
@@ -212,7 +212,10 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
        int ret;
 
        mutex_lock(&dev_priv->rps.hw_lock);
-       ret = dev_priv->rps.cur_delay * GT_FREQUENCY_MULTIPLIER;
+       if (IS_VALLEYVIEW(dev_priv->dev))
+               ret = vlv_gpu_freq(dev_priv->mem_freq, dev_priv->rps.cur_delay);
+       else
+               ret = dev_priv->rps.cur_delay * GT_FREQUENCY_MULTIPLIER;
        mutex_unlock(&dev_priv->rps.hw_lock);
 
        return snprintf(buf, PAGE_SIZE, "%d\n", ret);
@@ -226,7 +229,10 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute
        int ret;
 
        mutex_lock(&dev_priv->rps.hw_lock);
-       ret = dev_priv->rps.max_delay * GT_FREQUENCY_MULTIPLIER;
+       if (IS_VALLEYVIEW(dev_priv->dev))
+               ret = vlv_gpu_freq(dev_priv->mem_freq, dev_priv->rps.max_delay);
+       else
+               ret = dev_priv->rps.max_delay * GT_FREQUENCY_MULTIPLIER;
        mutex_unlock(&dev_priv->rps.hw_lock);
 
        return snprintf(buf, PAGE_SIZE, "%d\n", ret);
@@ -246,16 +252,25 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
        if (ret)
                return ret;
 
-       val /= GT_FREQUENCY_MULTIPLIER;
-
        mutex_lock(&dev_priv->rps.hw_lock);
 
-       rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-       hw_max = dev_priv->rps.hw_max;
-       non_oc_max = (rp_state_cap & 0xff);
-       hw_min = ((rp_state_cap & 0xff0000) >> 16);
+       if (IS_VALLEYVIEW(dev_priv->dev)) {
+               val = vlv_freq_opcode(dev_priv->mem_freq, val);
+
+               hw_max = valleyview_rps_max_freq(dev_priv);
+               hw_min = valleyview_rps_min_freq(dev_priv);
+               non_oc_max = hw_max;
+       } else {
+               val /= GT_FREQUENCY_MULTIPLIER;
 
-       if (val < hw_min || val > hw_max || val < dev_priv->rps.min_delay) {
+               rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+               hw_max = dev_priv->rps.hw_max;
+               non_oc_max = (rp_state_cap & 0xff);
+               hw_min = ((rp_state_cap & 0xff0000) >> 16);
+       }
+
+       if (val < hw_min || val > hw_max ||
+           val < dev_priv->rps.min_delay) {
                mutex_unlock(&dev_priv->rps.hw_lock);
                return -EINVAL;
        }
@@ -264,8 +279,12 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
                DRM_DEBUG("User requested overclocking to %d\n",
                          val * GT_FREQUENCY_MULTIPLIER);
 
-       if (dev_priv->rps.cur_delay > val)
-               gen6_set_rps(dev_priv->dev, val);
+       if (dev_priv->rps.cur_delay > val) {
+               if (IS_VALLEYVIEW(dev_priv->dev))
+                       valleyview_set_rps(dev_priv->dev, val);
+               else
+                       gen6_set_rps(dev_priv->dev, val);
+       }
 
        dev_priv->rps.max_delay = val;
 
@@ -282,7 +301,10 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute
        int ret;
 
        mutex_lock(&dev_priv->rps.hw_lock);
-       ret = dev_priv->rps.min_delay * GT_FREQUENCY_MULTIPLIER;
+       if (IS_VALLEYVIEW(dev_priv->dev))
+               ret = vlv_gpu_freq(dev_priv->mem_freq, dev_priv->rps.min_delay);
+       else
+               ret = dev_priv->rps.min_delay * GT_FREQUENCY_MULTIPLIER;
        mutex_unlock(&dev_priv->rps.hw_lock);
 
        return snprintf(buf, PAGE_SIZE, "%d\n", ret);
@@ -302,21 +324,32 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
        if (ret)
                return ret;
 
-       val /= GT_FREQUENCY_MULTIPLIER;
-
        mutex_lock(&dev_priv->rps.hw_lock);
 
-       rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-       hw_max = dev_priv->rps.hw_max;
-       hw_min = ((rp_state_cap & 0xff0000) >> 16);
+       if (IS_VALLEYVIEW(dev)) {
+               val = vlv_freq_opcode(dev_priv->mem_freq, val);
+
+               hw_max = valleyview_rps_max_freq(dev_priv);
+               hw_min = valleyview_rps_min_freq(dev_priv);
+       } else {
+               val /= GT_FREQUENCY_MULTIPLIER;
+
+               rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+               hw_max = dev_priv->rps.hw_max;
+               hw_min = ((rp_state_cap & 0xff0000) >> 16);
+       }
 
        if (val < hw_min || val > hw_max || val > dev_priv->rps.max_delay) {
                mutex_unlock(&dev_priv->rps.hw_lock);
                return -EINVAL;
        }
 
-       if (dev_priv->rps.cur_delay < val)
-               gen6_set_rps(dev_priv->dev, val);
+       if (dev_priv->rps.cur_delay < val) {
+               if (IS_VALLEYVIEW(dev))
+                       valleyview_set_rps(dev, val);
+               else
+                       gen6_set_rps(dev_priv->dev, val);
+       }
 
        dev_priv->rps.min_delay = val;
 
index f802368e8e9d5da0ede05feb0ae8683bef6a57bd..2557926553b3425b5956edb08a4804a83b1318f8 100644 (file)
@@ -2481,6 +2481,52 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
        trace_intel_gpu_freq_change(val * 50);
 }
 
+void valleyview_set_rps(struct drm_device *dev, u8 val)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       unsigned long timeout = jiffies + msecs_to_jiffies(10);
+       u32 limits = gen6_rps_limits(dev_priv, &val);
+       u32 pval;
+
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+       WARN_ON(val > dev_priv->rps.max_delay);
+       WARN_ON(val < dev_priv->rps.min_delay);
+
+       DRM_DEBUG_DRIVER("gpu freq request from %d to %d\n",
+                        vlv_gpu_freq(dev_priv->mem_freq,
+                                     dev_priv->rps.cur_delay),
+                        vlv_gpu_freq(dev_priv->mem_freq, val));
+
+       if (val == dev_priv->rps.cur_delay)
+               return;
+
+       valleyview_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+
+       do {
+               valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval);
+               if (time_after(jiffies, timeout)) {
+                       DRM_DEBUG_DRIVER("timed out waiting for Punit\n");
+                       break;
+               }
+               udelay(10);
+       } while (pval & 1);
+
+       valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval);
+       if ((pval >> 8) != val)
+               DRM_DEBUG_DRIVER("punit overrode freq: %d requested, but got %d\n",
+                         val, pval >> 8);
+
+       /* Make sure we continue to get interrupts
+        * until we hit the minimum or maximum frequencies.
+        */
+       I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits);
+
+       dev_priv->rps.cur_delay = pval >> 8;
+
+       trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv->mem_freq, val));
+}
+
+
 static void gen6_disable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2742,6 +2788,127 @@ static void gen6_update_ring_freq(struct drm_device *dev)
        }
 }
 
+int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val, rp0;
+
+       valleyview_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE, &val);
+
+       rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
+       /* Clamp to max */
+       rp0 = min_t(u32, rp0, 0xea);
+
+       return rp0;
+}
+
+static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val, rpe;
+
+       valleyview_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO, &val);
+       rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
+       valleyview_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI, &val);
+       rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
+
+       return rpe;
+}
+
+int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val;
+
+       valleyview_punit_read(dev_priv, PUNIT_REG_GPU_LFM, &val);
+
+       return val & 0xff;
+}
+
+static void valleyview_enable_rps(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_ring_buffer *ring;
+       u32 gtfifodbg, val, rpe;
+       int i;
+
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+       if ((gtfifodbg = I915_READ(GTFIFODBG))) {
+               DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
+               I915_WRITE(GTFIFODBG, gtfifodbg);
+       }
+
+       gen6_gt_force_wake_get(dev_priv);
+
+       I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
+       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
+       I915_WRITE(GEN6_RP_UP_EI, 66000);
+       I915_WRITE(GEN6_RP_DOWN_EI, 350000);
+
+       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+       I915_WRITE(GEN6_RP_CONTROL,
+                  GEN6_RP_MEDIA_TURBO |
+                  GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                  GEN6_RP_MEDIA_IS_GFX |
+                  GEN6_RP_ENABLE |
+                  GEN6_RP_UP_BUSY_AVG |
+                  GEN6_RP_DOWN_IDLE_CONT);
+
+       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+       I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
+       I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
+
+       for_each_ring(ring, dev_priv, i)
+               I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
+
+       I915_WRITE(GEN6_RC6_THRESHOLD, 0xc350);
+
+       /* allows RC6 residency counter to work */
+       I915_WRITE(0x138104, _MASKED_BIT_ENABLE(0x3));
+       I915_WRITE(GEN6_RC_CONTROL,
+                  GEN7_RC_CTL_TO_MODE);
+
+       valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &val);
+       dev_priv->mem_freq = 800 + (266 * (val >> 6) & 3);
+       DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
+
+       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no");
+       DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+       DRM_DEBUG_DRIVER("current GPU freq: %d\n",
+                        vlv_gpu_freq(dev_priv->mem_freq, (val >> 8) & 0xff));
+       dev_priv->rps.cur_delay = (val >> 8) & 0xff;
+
+       dev_priv->rps.max_delay = valleyview_rps_max_freq(dev_priv);
+       dev_priv->rps.hw_max = dev_priv->rps.max_delay;
+       DRM_DEBUG_DRIVER("max GPU freq: %d\n", vlv_gpu_freq(dev_priv->mem_freq,
+                                                    dev_priv->rps.max_delay));
+
+       rpe = valleyview_rps_rpe_freq(dev_priv);
+       DRM_DEBUG_DRIVER("RPe GPU freq: %d\n",
+                        vlv_gpu_freq(dev_priv->mem_freq, rpe));
+
+       val = valleyview_rps_min_freq(dev_priv);
+       DRM_DEBUG_DRIVER("min GPU freq: %d\n", vlv_gpu_freq(dev_priv->mem_freq,
+                                                           val));
+       dev_priv->rps.min_delay = val;
+
+       DRM_DEBUG_DRIVER("setting GPU freq to %d\n",
+                        vlv_gpu_freq(dev_priv->mem_freq, rpe));
+
+       valleyview_set_rps(dev_priv->dev, rpe);
+
+       /* requires MSI enabled */
+       I915_WRITE(GEN6_PMIER, GEN6_PM_DEFERRED_EVENTS);
+       spin_lock_irq(&dev_priv->rps.lock);
+       WARN_ON(dev_priv->rps.pm_iir != 0);
+       I915_WRITE(GEN6_PMIMR, 0);
+       spin_unlock_irq(&dev_priv->rps.lock);
+       /* enable all PM interrupts */
+       I915_WRITE(GEN6_PMINTRMSK, 0);
+
+       gen6_gt_force_wake_put(dev_priv);
+}
+
 void ironlake_teardown_rc6(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3468,7 +3635,7 @@ void intel_disable_gt_powersave(struct drm_device *dev)
        if (IS_IRONLAKE_M(dev)) {
                ironlake_disable_drps(dev);
                ironlake_disable_rc6(dev);
-       } else if (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) {
+       } else if (INTEL_INFO(dev)->gen >= 6) {
                cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
                mutex_lock(&dev_priv->rps.hw_lock);
                gen6_disable_rps(dev);
@@ -3484,8 +3651,13 @@ static void intel_gen6_powersave_work(struct work_struct *work)
        struct drm_device *dev = dev_priv->dev;
 
        mutex_lock(&dev_priv->rps.hw_lock);
-       gen6_enable_rps(dev);
-       gen6_update_ring_freq(dev);
+
+       if (IS_VALLEYVIEW(dev)) {
+               valleyview_enable_rps(dev);
+       } else {
+               gen6_enable_rps(dev);
+               gen6_update_ring_freq(dev);
+       }
        mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
@@ -3497,7 +3669,7 @@ void intel_enable_gt_powersave(struct drm_device *dev)
                ironlake_enable_drps(dev);
                ironlake_enable_rc6(dev);
                intel_init_emon(dev);
-       } else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) {
+       } else if (IS_GEN6(dev) || IS_GEN7(dev)) {
                /*
                 * PCU communication is slow and this doesn't need to be
                 * done at any specific time, so do this out of our fast path
@@ -4568,14 +4740,13 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
        return 0;
 }
 
-static int vlv_punit_rw(struct drm_i915_private *dev_priv, u8 opcode,
+static int vlv_punit_rw(struct drm_i915_private *dev_priv, u32 port, u8 opcode,
                        u8 addr, u32 *val)
 {
-       u32 cmd, devfn, port, be, bar;
+       u32 cmd, devfn, be, bar;
 
        bar = 0;
        be = 0xf;
-       port = IOSF_PORT_PUNIT;
        devfn = PCI_DEVFN(2, 0);
 
        cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
@@ -4597,7 +4768,7 @@ static int vlv_punit_rw(struct drm_i915_private *dev_priv, u8 opcode,
        I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd);
 
        if (wait_for((I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) == 0,
-                    500)) {
+                    5)) {
                DRM_ERROR("timeout waiting for pcode %s (%d) to finish\n",
                          opcode == PUNIT_OPCODE_REG_READ ? "read" : "write",
                          addr);
@@ -4613,12 +4784,20 @@ static int vlv_punit_rw(struct drm_i915_private *dev_priv, u8 opcode,
 
 int valleyview_punit_read(struct drm_i915_private *dev_priv, u8 addr, u32 *val)
 {
-       return vlv_punit_rw(dev_priv, PUNIT_OPCODE_REG_READ, addr, val);
+       return vlv_punit_rw(dev_priv, IOSF_PORT_PUNIT, PUNIT_OPCODE_REG_READ,
+                           addr, val);
 }
 
 int valleyview_punit_write(struct drm_i915_private *dev_priv, u8 addr, u32 val)
 {
-       return vlv_punit_rw(dev_priv, PUNIT_OPCODE_REG_WRITE, addr, &val);
+       return vlv_punit_rw(dev_priv, IOSF_PORT_PUNIT, PUNIT_OPCODE_REG_WRITE,
+                           addr, &val);
+}
+
+int valleyview_nc_read(struct drm_i915_private *dev_priv, u8 addr, u32 *val)
+{
+       return vlv_punit_rw(dev_priv, IOSF_PORT_NC, PUNIT_OPCODE_REG_READ,
+                           addr, val);
 }
 
 int vlv_gpu_freq(int ddr_freq, int val)