drm/i915: Interactive RPS mode
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 31 Jul 2018 13:26:29 +0000 (14:26 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 31 Jul 2018 14:50:30 +0000 (15:50 +0100)
RPS provides a feedback loop where we use the load during the previous
evaluation interval to decide whether to up or down clock the GPU
frequency. Our responsiveness is split into 3 regimes, a high and low
plateau with the intent to keep the gpu clocked high to cover occasional
stalls under high load, and low despite occasional glitches under steady
low load, and inbetween. However, we run into situations like kodi where
we want to stay at low power (video decoding is done efficiently
inside the fixed function HW and doesn't need high clocks even for high
bitrate streams), but just occasionally the pipeline is more complex
than a video decode and we need a smidgen of extra GPU power to present
on time. In the high power regime, we sample at sub frame intervals with
a bias to upclocking, and conversely at low power we sample over a few
frames worth to provide what we consider to be the right levels of
responsiveness respectively. At low power, we more or less expect to be
kicked out to high power at the start of a busy sequence by waitboosting.

Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active
request") whenever we missed the frame or stalled, we would immediate go
full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we
relaxed the waitboosting to only apply if the pipeline was deep to avoid
over-committing resources for a near miss. Sadly though, a near miss is
still a miss, and perceptible as jitter in the frame delivery.

To try and prevent the near miss before having to resort to boosting
after the fact, we use the pageflip queue as an indication that we are
in an "interactive" regime and so should sample the load more frequently
to provide power before the frame misses it vblank. This will make us
more favorable to providing a small power increase (one or two bins) as
required rather than going all the way to maximum and then having to
work back down again. (We still keep the waitboosting mechanism around
just in case a dramatic change in system load requires urgent uplocking,
faster than we can provide in a few evaluation intervals.)

v2: Reduce rps_set_interactive to a boolean parameter to avoid the
confusion of what if they wanted a new power mode after pinning to a
different mode (which to choose?)
v3: Only reprogram RPS while the GT is awake, it will be set when we
wake the GT, and while off warns about being used outside of rpm.
v4: Fix deferred application of interactive mode
v5: s/state/interactive/
v6: Group the mutex with its principle in a substruct

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111
Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_pm.c

index 59dc0610ea44fb89c72a59ba2b8dcb0738059897..f9ce35da4123ec52657f55f6a704c12c9c286080 100644 (file)
@@ -1218,7 +1218,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
                           rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup));
                seq_printf(m, "RP PREV UP: %d (%dus)\n",
                           rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup));
-               seq_printf(m, "Up threshold: %d%%\n", rps->up_threshold);
+               seq_printf(m, "Up threshold: %d%%\n",
+                          rps->power.up_threshold);
 
                seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
                           rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei));
@@ -1226,7 +1227,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
                           rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown));
                seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
                           rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown));
-               seq_printf(m, "Down threshold: %d%%\n", rps->down_threshold);
+               seq_printf(m, "Down threshold: %d%%\n",
+                          rps->power.down_threshold);
 
                max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 :
                            rp_state_cap >> 16) & 0xff;
@@ -2218,6 +2220,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
        seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
        seq_printf(m, "Boosts outstanding? %d\n",
                   atomic_read(&rps->num_waiters));
+       seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
        seq_printf(m, "Frequency requested %d\n",
                   intel_gpu_freq(dev_priv, rps->cur_freq));
        seq_printf(m, "  min hard:%d, soft:%d; max soft:%d, hard:%d\n",
@@ -2261,13 +2264,13 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
                intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
                seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
-                          rps_power_to_str(rps->power));
+                          rps_power_to_str(rps->power.mode));
                seq_printf(m, "  Avg. up: %d%% [above threshold? %d%%]\n",
                           rpup && rpupei ? 100 * rpup / rpupei : 0,
-                          rps->up_threshold);
+                          rps->power.up_threshold);
                seq_printf(m, "  Avg. down: %d%% [below threshold? %d%%]\n",
                           rpdown && rpdownei ? 100 * rpdown / rpdownei : 0,
-                          rps->down_threshold);
+                          rps->power.down_threshold);
        } else {
                seq_puts(m, "\nRPS Autotuning inactive\n");
        }
index 0f49f9988dfae8ff2c59a24e30b8ee9ad71a9f8d..4aca5344863d6fc013470b41a706c4e7bd18d567 100644 (file)
@@ -779,11 +779,17 @@ struct intel_rps {
        u8 rp0_freq;            /* Non-overclocked max frequency. */
        u16 gpll_ref_freq;      /* vlv/chv GPLL reference frequency */
 
-       u8 up_threshold; /* Current %busy required to uplock */
-       u8 down_threshold; /* Current %busy required to downclock */
-
        int last_adj;
-       enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
+
+       struct {
+               struct mutex mutex;
+
+               enum { LOW_POWER, BETWEEN, HIGH_POWER } mode;
+               unsigned int interactive;
+
+               u8 up_threshold; /* Current %busy required to uplock */
+               u8 down_threshold; /* Current %busy required to downclock */
+       } power;
 
        bool enabled;
        atomic_t num_waiters;
@@ -3422,6 +3428,8 @@ extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
 extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
 extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
 extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
+extern void intel_rps_mark_interactive(struct drm_i915_private *i915,
+                                      bool interactive);
 extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
                                  bool enable);
 
index 5dadefca2ad22a77dfb64d2274a50f93f5a2e02a..90628a47ae17f81312dff51ddbc89aff4af55654 100644 (file)
@@ -1265,9 +1265,9 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
                c0 = max(render, media);
                c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
 
-               if (c0 > time * rps->up_threshold)
+               if (c0 > time * rps->power.up_threshold)
                        events = GEN6_PM_RP_UP_THRESHOLD;
-               else if (c0 < time * rps->down_threshold)
+               else if (c0 < time * rps->power.down_threshold)
                        events = GEN6_PM_RP_DOWN_THRESHOLD;
        }
 
index 577b30dde45b3e40f5a6e82291fd877b5ed4a4e2..73c6d56ba3ec980c7a2d38c404d45c92c4f7c2f7 100644 (file)
@@ -13104,6 +13104,19 @@ intel_prepare_plane_fb(struct drm_plane *plane,
                add_rps_boost_after_vblank(new_state->crtc, new_state->fence);
        }
 
+       /*
+        * We declare pageflips to be interactive and so merit a small bias
+        * towards upclocking to deliver the frame on time. By only changing
+        * the RPS thresholds to sample more regularly and aim for higher
+        * clocks we can hopefully deliver low power workloads (like kodi)
+        * that are not quite steady state without resorting to forcing
+        * maximum clocks following a vblank miss (see do_rps_boost()).
+        */
+       if (!intel_state->rps_interactive) {
+               intel_rps_mark_interactive(dev_priv, true);
+               intel_state->rps_interactive = true;
+       }
+
        return 0;
 }
 
@@ -13120,8 +13133,15 @@ void
 intel_cleanup_plane_fb(struct drm_plane *plane,
                       struct drm_plane_state *old_state)
 {
+       struct intel_atomic_state *intel_state =
+               to_intel_atomic_state(old_state->state);
        struct drm_i915_private *dev_priv = to_i915(plane->dev);
 
+       if (intel_state->rps_interactive) {
+               intel_rps_mark_interactive(dev_priv, false);
+               intel_state->rps_interactive = false;
+       }
+
        /* Should only be called after a successful intel_prepare_plane_fb()! */
        mutex_lock(&dev_priv->drm.struct_mutex);
        intel_plane_unpin_fb(to_intel_plane_state(old_state));
index 99a5f5be5b82bce22c7f16b7514977e980e6bf5f..1ad7c1124befe034d63c55ba9b312480061b199e 100644 (file)
@@ -484,6 +484,8 @@ struct intel_atomic_state {
         */
        bool skip_intermediate_wm;
 
+       bool rps_interactive;
+
        /* Gen9+ only */
        struct skl_ddb_values wm_results;
 
index 8a4152244571f316081dd3982dcbd81abe9c2bf7..2531eb75bdceef43b390cd8824890ccff63eb2be 100644 (file)
@@ -6256,42 +6256,15 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
        return limits;
 }
 
-static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
+static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
 {
        struct intel_rps *rps = &dev_priv->gt_pm.rps;
-       int new_power;
        u32 threshold_up = 0, threshold_down = 0; /* in % */
        u32 ei_up = 0, ei_down = 0;
 
-       new_power = rps->power;
-       switch (rps->power) {
-       case LOW_POWER:
-               if (val > rps->efficient_freq + 1 &&
-                   val > rps->cur_freq)
-                       new_power = BETWEEN;
-               break;
-
-       case BETWEEN:
-               if (val <= rps->efficient_freq &&
-                   val < rps->cur_freq)
-                       new_power = LOW_POWER;
-               else if (val >= rps->rp0_freq &&
-                        val > rps->cur_freq)
-                       new_power = HIGH_POWER;
-               break;
+       lockdep_assert_held(&rps->power.mutex);
 
-       case HIGH_POWER:
-               if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
-                   val < rps->cur_freq)
-                       new_power = BETWEEN;
-               break;
-       }
-       /* Max/min bins are special */
-       if (val <= rps->min_freq_softlimit)
-               new_power = LOW_POWER;
-       if (val >= rps->max_freq_softlimit)
-               new_power = HIGH_POWER;
-       if (new_power == rps->power)
+       if (new_power == rps->power.mode)
                return;
 
        /* Note the units here are not exactly 1us, but 1280ns. */
@@ -6354,12 +6327,71 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
                   GEN6_RP_DOWN_IDLE_AVG);
 
 skip_hw_write:
-       rps->power = new_power;
-       rps->up_threshold = threshold_up;
-       rps->down_threshold = threshold_down;
+       rps->power.mode = new_power;
+       rps->power.up_threshold = threshold_up;
+       rps->power.down_threshold = threshold_down;
+}
+
+static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
+{
+       struct intel_rps *rps = &dev_priv->gt_pm.rps;
+       int new_power;
+
+       new_power = rps->power.mode;
+       switch (rps->power.mode) {
+       case LOW_POWER:
+               if (val > rps->efficient_freq + 1 &&
+                   val > rps->cur_freq)
+                       new_power = BETWEEN;
+               break;
+
+       case BETWEEN:
+               if (val <= rps->efficient_freq &&
+                   val < rps->cur_freq)
+                       new_power = LOW_POWER;
+               else if (val >= rps->rp0_freq &&
+                        val > rps->cur_freq)
+                       new_power = HIGH_POWER;
+               break;
+
+       case HIGH_POWER:
+               if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
+                   val < rps->cur_freq)
+                       new_power = BETWEEN;
+               break;
+       }
+       /* Max/min bins are special */
+       if (val <= rps->min_freq_softlimit)
+               new_power = LOW_POWER;
+       if (val >= rps->max_freq_softlimit)
+               new_power = HIGH_POWER;
+
+       mutex_lock(&rps->power.mutex);
+       if (rps->power.interactive)
+               new_power = HIGH_POWER;
+       rps_set_power(dev_priv, new_power);
+       mutex_unlock(&rps->power.mutex);
        rps->last_adj = 0;
 }
 
+void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
+{
+       struct intel_rps *rps = &i915->gt_pm.rps;
+
+       if (INTEL_GEN(i915) < 6)
+               return;
+
+       mutex_lock(&rps->power.mutex);
+       if (interactive) {
+               if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
+                       rps_set_power(i915, HIGH_POWER);
+       } else {
+               GEM_BUG_ON(!rps->power.interactive);
+               rps->power.interactive--;
+       }
+       mutex_unlock(&rps->power.mutex);
+}
+
 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
 {
        struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -6772,7 +6804,7 @@ static void reset_rps(struct drm_i915_private *dev_priv,
        u8 freq = rps->cur_freq;
 
        /* force a reset */
-       rps->power = -1;
+       rps->power.mode = -1;
        rps->cur_freq = -1;
 
        if (set(dev_priv, freq))
@@ -9596,6 +9628,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 void intel_pm_setup(struct drm_i915_private *dev_priv)
 {
        mutex_init(&dev_priv->pcu_lock);
+       mutex_init(&dev_priv->gt_pm.rps.power.mutex);
 
        atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);