drm/i915/skl+: Optimize WM calculation
authorKumar, Mahesh <mahesh1.kumar@intel.com>
Thu, 17 Aug 2017 13:45:23 +0000 (19:15 +0530)
committerMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Thu, 7 Sep 2017 11:34:05 +0000 (13:34 +0200)
Plane configuration parameters doesn't change for each WM-level
calculation. Currently we compute same parameters 8 times for each
wm-level.
This patch optimizes it by calculating these parameters in beginning
& reuse during each level-wm calculation.

Changes since V1:
 - rebase on top of Rodrigo's series for CNL

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170817134529.2839-3-mahesh1.kumar@intel.com
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/intel_pm.c

index 2f12373d2f547bcea7001a7dfa9cce15cabd4c1e..0df2b0f620f78c5607d47b65d89258998bbfcc06 100644 (file)
@@ -1837,6 +1837,20 @@ struct skl_wm_level {
        uint8_t plane_res_l;
 };
 
+/* Stores plane specific WM parameters */
+struct skl_wm_params {
+       bool x_tiled, y_tiled;
+       bool rc_surface;
+       uint32_t width;
+       uint8_t cpp;
+       uint32_t plane_pixel_rate;
+       uint32_t y_min_scanlines;
+       uint32_t plane_bytes_per_line;
+       uint_fixed_16_16_t plane_blocks_per_line;
+       uint_fixed_16_16_t y_tile_minimum;
+       uint32_t linetime_us;
+};
+
 /*
  * This struct helps tracking the state needed for runtime PM, which puts the
  * device in PCI D3 state. Notice that when this happens, nothing on the
index 0201816a422978d0e79086a7c916f5634953136b..2dd3af3debe97b25e6c83fe6e76dd1bad9525aba 100644 (file)
@@ -4376,134 +4376,146 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
                                            downscale_amount);
 }
 
-static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
-                               struct intel_crtc_state *cstate,
-                               const struct intel_plane_state *intel_pstate,
-                               uint16_t ddb_allocation,
-                               int level,
-                               uint16_t *out_blocks, /* out */
-                               uint8_t *out_lines, /* out */
-                               bool *enabled /* out */)
+static int
+skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
+                           struct intel_crtc_state *cstate,
+                           const struct intel_plane_state *intel_pstate,
+                           struct skl_wm_params *wp)
 {
        struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
        const struct drm_plane_state *pstate = &intel_pstate->base;
        const struct drm_framebuffer *fb = pstate->fb;
-       uint32_t latency = dev_priv->wm.skl_latency[level];
-       uint_fixed_16_16_t method1, method2;
-       uint_fixed_16_16_t plane_blocks_per_line;
-       uint_fixed_16_16_t selected_result;
        uint32_t interm_pbpl;
-       uint32_t plane_bytes_per_line;
-       uint32_t res_blocks, res_lines;
-       uint8_t cpp;
-       uint32_t width = 0;
-       uint32_t plane_pixel_rate;
-       uint_fixed_16_16_t y_tile_minimum;
-       uint32_t y_min_scanlines;
        struct intel_atomic_state *state =
                to_intel_atomic_state(cstate->base.state);
        bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
-       bool y_tiled, x_tiled;
 
-       if (latency == 0 ||
-           !intel_wm_plane_visible(cstate, intel_pstate)) {
-               *enabled = false;
+       if (!intel_wm_plane_visible(cstate, intel_pstate))
                return 0;
-       }
 
-       y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
-                 fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
-                 fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
-                 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
-       x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
-
-       /* Display WA #1141: kbl,cfl */
-       if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
-           dev_priv->ipc_enabled)
-               latency += 4;
-
-       if (apply_memory_bw_wa && x_tiled)
-               latency += 15;
+       wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
+                     fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
+                     fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
+                     fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
+       wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
+       wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
+                        fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
 
        if (plane->id == PLANE_CURSOR) {
-               width = intel_pstate->base.crtc_w;
+               wp->width = intel_pstate->base.crtc_w;
        } else {
                /*
                 * Src coordinates are already rotated by 270 degrees for
                 * the 90/270 degree plane rotation cases (to match the
                 * GTT mapping), hence no need to account for rotation here.
                 */
-               width = drm_rect_width(&intel_pstate->base.src) >> 16;
+               wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
        }
 
-       cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
-                                                       fb->format->cpp[0];
-       plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
+       wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
+                                                           fb->format->cpp[0];
+       wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
+                                                            intel_pstate);
 
        if (drm_rotation_90_or_270(pstate->rotation)) {
 
-               switch (cpp) {
+               switch (wp->cpp) {
                case 1:
-                       y_min_scanlines = 16;
+                       wp->y_min_scanlines = 16;
                        break;
                case 2:
-                       y_min_scanlines = 8;
+                       wp->y_min_scanlines = 8;
                        break;
                case 4:
-                       y_min_scanlines = 4;
+                       wp->y_min_scanlines = 4;
                        break;
                default:
-                       MISSING_CASE(cpp);
+                       MISSING_CASE(wp->cpp);
                        return -EINVAL;
                }
        } else {
-               y_min_scanlines = 4;
+               wp->y_min_scanlines = 4;
        }
 
        if (apply_memory_bw_wa)
-               y_min_scanlines *= 2;
+               wp->y_min_scanlines *= 2;
 
-       plane_bytes_per_line = width * cpp;
-       if (y_tiled) {
-               interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
-                                          y_min_scanlines, 512);
+       wp->plane_bytes_per_line = wp->width * wp->cpp;
+       if (wp->y_tiled) {
+               interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
+                                          wp->y_min_scanlines, 512);
 
                if (INTEL_GEN(dev_priv) >= 10)
                        interm_pbpl++;
 
-               plane_blocks_per_line = div_fixed16(interm_pbpl,
-                                                       y_min_scanlines);
-       } else if (x_tiled && INTEL_GEN(dev_priv) == 9) {
-               interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
-               plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
+               wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
+                                                       wp->y_min_scanlines);
+       } else if (wp->x_tiled && IS_GEN9(dev_priv)) {
+               interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
+               wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
        } else {
-               interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
-               plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
+               interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
+               wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
        }
 
-       method1 = skl_wm_method1(dev_priv, plane_pixel_rate, cpp, latency);
-       method2 = skl_wm_method2(plane_pixel_rate,
+       wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
+                                            wp->plane_blocks_per_line);
+       wp->linetime_us = fixed16_to_u32_round_up(
+                                       intel_get_linetime_us(cstate));
+
+       return 0;
+}
+
+static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
+                               struct intel_crtc_state *cstate,
+                               const struct intel_plane_state *intel_pstate,
+                               uint16_t ddb_allocation,
+                               int level,
+                               const struct skl_wm_params *wp,
+                               uint16_t *out_blocks, /* out */
+                               uint8_t *out_lines, /* out */
+                               bool *enabled /* out */)
+{
+       const struct drm_plane_state *pstate = &intel_pstate->base;
+       uint32_t latency = dev_priv->wm.skl_latency[level];
+       uint_fixed_16_16_t method1, method2;
+       uint_fixed_16_16_t selected_result;
+       uint32_t res_blocks, res_lines;
+       struct intel_atomic_state *state =
+               to_intel_atomic_state(cstate->base.state);
+       bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
+
+       if (latency == 0 ||
+           !intel_wm_plane_visible(cstate, intel_pstate)) {
+               *enabled = false;
+               return 0;
+       }
+
+       /* Display WA #1141: kbl,cfl */
+       if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
+           dev_priv->ipc_enabled)
+               latency += 4;
+
+       if (apply_memory_bw_wa && wp->x_tiled)
+               latency += 15;
+
+       method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
+                                wp->cpp, latency);
+       method2 = skl_wm_method2(wp->plane_pixel_rate,
                                 cstate->base.adjusted_mode.crtc_htotal,
                                 latency,
-                                plane_blocks_per_line);
-
-       y_tile_minimum = mul_u32_fixed16(y_min_scanlines,
-                                        plane_blocks_per_line);
+                                wp->plane_blocks_per_line);
 
-       if (y_tiled) {
-               selected_result = max_fixed16(method2, y_tile_minimum);
+       if (wp->y_tiled) {
+               selected_result = max_fixed16(method2, wp->y_tile_minimum);
        } else {
-               uint32_t linetime_us;
-
-               linetime_us = fixed16_to_u32_round_up(
-                               intel_get_linetime_us(cstate));
-               if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
-                   (plane_bytes_per_line / 512 < 1))
+               if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
+                    512 < 1) && (wp->plane_bytes_per_line / 512 < 1))
                        selected_result = method2;
                else if (ddb_allocation >=
-                        fixed16_to_u32_round_up(plane_blocks_per_line))
+                        fixed16_to_u32_round_up(wp->plane_blocks_per_line))
                        selected_result = min_fixed16(method1, method2);
-               else if (latency >= linetime_us)
+               else if (latency >= wp->linetime_us)
                        selected_result = min_fixed16(method1, method2);
                else
                        selected_result = method1;
@@ -4511,19 +4523,18 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 
        res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
        res_lines = div_round_up_fixed16(selected_result,
-                                        plane_blocks_per_line);
+                                        wp->plane_blocks_per_line);
 
        /* Display WA #1125: skl,bxt,kbl,glk */
-       if (level == 0 &&
-           (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
-            fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS))
-               res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
+       if (level == 0 && wp->rc_surface)
+               res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum);
 
        /* Display WA #1126: skl,bxt,kbl,glk */
        if (level >= 1 && level <= 7) {
-               if (y_tiled) {
-                       res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
-                       res_lines += y_min_scanlines;
+               if (wp->y_tiled) {
+                       res_blocks += fixed16_to_u32_round_up(
+                                                       wp->y_tile_minimum);
+                       res_lines += wp->y_min_scanlines;
                } else {
                        res_blocks++;
                }
@@ -4561,6 +4572,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
                      struct skl_ddb_allocation *ddb,
                      struct intel_crtc_state *cstate,
                      const struct intel_plane_state *intel_pstate,
+                     const struct skl_wm_params *wm_params,
                      struct skl_plane_wm *wm)
 {
        struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
@@ -4584,6 +4596,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
                                           intel_pstate,
                                           ddb_blocks,
                                           level,
+                                          wm_params,
                                           &result->plane_res_b,
                                           &result->plane_res_l,
                                           &result->plane_en);
@@ -4648,11 +4661,18 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
                const struct intel_plane_state *intel_pstate =
                                                to_intel_plane_state(pstate);
                enum plane_id plane_id = to_intel_plane(plane)->id;
+               struct skl_wm_params wm_params;
 
                wm = &pipe_wm->planes[plane_id];
+               memset(&wm_params, 0, sizeof(struct skl_wm_params));
+
+               ret = skl_compute_plane_wm_params(dev_priv, cstate,
+                                                 intel_pstate, &wm_params);
+               if (ret)
+                       return ret;
 
                ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
-                                           intel_pstate, wm);
+                                           intel_pstate, &wm_params, wm);
                if (ret)
                        return ret;
                skl_compute_transition_wm(cstate, &wm->trans_wm);