From d8e874980241dd1cee68e85c39d8e5372bbad494 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 11 Dec 2018 09:31:07 -0800 Subject: [PATCH] drm/i915: Switch to level-based DDB allocation algorithm (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The DDB allocation algorithm currently used by the driver grants each plane a very small minimum allocation of DDB blocks and then divies up all of the remaining blocks based on the percentage of the total data rate that the plane makes up. It turns out that this proportional allocation approach is overly-generous with the larger planes and can leave very small planes wthout a big enough allocation to even hit their level 0 watermark requirements (especially on APL, which has a smaller DDB in general than other gen9 platforms). Or there can be situations where the smallest planes hit a lower watermark level than they should have been able to hit with a more equitable division of DDB blocks, thus limiting the overall system sleep state that can be achieved. The bspec now describes an alternate algorithm that can be used to overcome these types of issues. With the new algorithm, we calculate all plane watermark values for all wm levels first, then go back and partition a pipe's DDB space second. The DDB allocation will calculate what the highest watermark level that can be achieved on *all* active planes, and then grant the blocks necessary to hit that level to each plane. Any remaining blocks are then divided up proportionally according to data rate, similar to the old algorithm. There was a previous attempt to implement this algorithm a couple years ago in bb9d85f6e9d ("drm/i915/skl: New ddb allocation algorithm"), but some regressions were reported, the patch was reverted, and nobody ever got around to figuring out exactly where the bug was in that version. Our watermark code has evolved significantly in the meantime, but we're still getting bug reports caused by the unfair proportional algorithm, so let's give this another shot. v2: - Make sure cursor allocation stays constant and fixed at the end of the pipe allocation. - Fix some watermark level iterators that weren't handling the max level. v3: - Ensure we don't leave any DDB blocks unused by using DIV_ROUND_UP+min to calculate the extra blocks for each plane. (Ville) - Replace a while() loop with a for() loop to be more consistent with surrounding code. (Ville) - Clean unattainable watermark levels with memset rather than directly clearing the member fields. Also do the same for the transition watermark values if they can't be achieved. (Ville) - Drop min_disp_buf_needed calculations in skl_compute_plane_wm() since the results are no longer needed or used. (Ville) - Drop skl_latency[0] != 0 sanity check; both watermark methods already account for an invalid 0 latency by returning FP_16_16_MAX. (Ville) v4: - Break DDB allocation loop when total_data_rate=0 rather than alloc_size=0. If total_data_rate has dropped to 0, all remaining planes are disabled, which isn't true for alloc_size (we might just have not had any remaining blocks to hand out). Plus total_data_rate=0 is the case we need to avoid to a prevent a div-by-0. (Ville) - s/DIV_ROUND_UP/DIV64_U64_ROUND_UP/ to prevent 32-bit breakage (Ville) v5: - Don't forget to move 'start' pointer forward for UV surface when setting plane DDB boundaries. (Ville) Cc: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105458 Signed-off-by: Matt Roper Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181211173107.11068-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 383 ++++++++++++-------------------- 1 file changed, 140 insertions(+), 243 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bf970cf7b8a5..6d074f2e69d3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4301,102 +4301,6 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, return total_data_rate; } -static uint16_t -skl_ddb_min_alloc(const struct intel_plane_state *plane_state, const int plane) -{ - struct drm_framebuffer *fb = plane_state->base.fb; - uint32_t src_w, src_h; - uint32_t min_scanlines = 8; - uint8_t plane_bpp; - - if (WARN_ON(!fb)) - return 0; - - /* For packed formats, and uv-plane, return 0 */ - if (plane == 1 && fb->format->format != DRM_FORMAT_NV12) - return 0; - - /* For Non Y-tile return 8-blocks */ - if (fb->modifier != I915_FORMAT_MOD_Y_TILED && - fb->modifier != I915_FORMAT_MOD_Yf_TILED && - fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS && - fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS) - return 8; - - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - src_w = drm_rect_width(&plane_state->base.src) >> 16; - src_h = drm_rect_height(&plane_state->base.src) >> 16; - - /* Halve UV plane width and height for NV12 */ - if (plane == 1) { - src_w /= 2; - src_h /= 2; - } - - plane_bpp = fb->format->cpp[plane]; - - if (drm_rotation_90_or_270(plane_state->base.rotation)) { - switch (plane_bpp) { - case 1: - min_scanlines = 32; - break; - case 2: - min_scanlines = 16; - break; - case 4: - min_scanlines = 8; - break; - case 8: - min_scanlines = 4; - break; - default: - WARN(1, "Unsupported pixel depth %u for rotation", - plane_bpp); - min_scanlines = 32; - } - } - - return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3; -} - -static void -skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, - uint16_t *minimum, uint16_t *uv_minimum) -{ - const struct drm_plane_state *pstate; - struct drm_plane *plane; - - drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) { - enum plane_id plane_id = to_intel_plane(plane)->id; - struct intel_plane_state *plane_state = to_intel_plane_state(pstate); - - if (plane_id == PLANE_CURSOR) - continue; - - /* slave plane must be invisible and calculated from master */ - if (!pstate->visible || WARN_ON(plane_state->slave)) - continue; - - if (!plane_state->linked_plane) { - minimum[plane_id] = skl_ddb_min_alloc(plane_state, 0); - uv_minimum[plane_id] = - skl_ddb_min_alloc(plane_state, 1); - } else { - enum plane_id y_plane_id = - plane_state->linked_plane->id; - - minimum[y_plane_id] = skl_ddb_min_alloc(plane_state, 0); - minimum[plane_id] = skl_ddb_min_alloc(plane_state, 1); - } - } - - minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active); -} - static int skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct skl_ddb_allocation *ddb /* out */) @@ -4406,15 +4310,17 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; - uint16_t alloc_size, start; - uint16_t minimum[I915_MAX_PLANES] = {}; - uint16_t uv_minimum[I915_MAX_PLANES] = {}; + struct skl_plane_wm *wm; + uint16_t alloc_size, start = 0; + uint16_t total[I915_MAX_PLANES] = {}; + uint16_t uv_total[I915_MAX_PLANES] = {}; u64 total_data_rate; enum plane_id plane_id; int num_active; u64 plane_data_rate[I915_MAX_PLANES] = {}; u64 uv_plane_data_rate[I915_MAX_PLANES] = {}; - uint16_t total_min_blocks = 0; + uint16_t blocks = 0; + int level; /* Clear the partitioning for disabled planes. */ memset(cstate->wm.skl.plane_ddb_y, 0, sizeof(cstate->wm.skl.plane_ddb_y)); @@ -4444,81 +4350,134 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, if (alloc_size == 0) return 0; - skl_ddb_calc_min(cstate, num_active, minimum, uv_minimum); + /* Allocate fixed number of blocks for cursor. */ + total[PLANE_CURSOR] = skl_cursor_allocation(num_active); + alloc_size -= total[PLANE_CURSOR]; + cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].start = + alloc->end - total[PLANE_CURSOR]; + cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end; + + if (total_data_rate == 0) + return 0; /* - * 1. Allocate the mininum required blocks for each active plane - * and allocate the cursor, it doesn't require extra allocation - * proportional to the data rate. + * Find the highest watermark level for which we can satisfy the block + * requirement of active planes. */ + for (level = ilk_wm_max_level(dev_priv); level >= 0; level--) { + for_each_plane_id_on_crtc(intel_crtc, plane_id) { + if (plane_id == PLANE_CURSOR) + continue; - for_each_plane_id_on_crtc(intel_crtc, plane_id) { - total_min_blocks += minimum[plane_id]; - total_min_blocks += uv_minimum[plane_id]; + wm = &cstate->wm.skl.optimal.planes[plane_id]; + blocks += wm->wm[level].plane_res_b; + blocks += wm->uv_wm[level].plane_res_b; + } + + if (blocks < alloc_size) { + alloc_size -= blocks; + break; + } } - if (total_min_blocks > alloc_size) { + if (level < 0) { DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations"); - DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks, - alloc_size); + DRM_DEBUG_KMS("minimum required %d/%d\n", blocks, + alloc_size); return -EINVAL; } - alloc_size -= total_min_blocks; - cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR]; - cstate->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end; - /* - * 2. Distribute the remaining space in proportion to the amount of - * data each plane needs to fetch from memory. - * - * FIXME: we may not allocate every single block here. + * Grant each plane the blocks it requires at the highest achievable + * watermark level, plus an extra share of the leftover blocks + * proportional to its relative data rate. */ - if (total_data_rate == 0) - return 0; - - start = alloc->start; for_each_plane_id_on_crtc(intel_crtc, plane_id) { - u64 data_rate, uv_data_rate; - uint16_t plane_blocks, uv_plane_blocks; + u64 rate; + u16 extra; if (plane_id == PLANE_CURSOR) continue; - data_rate = plane_data_rate[plane_id]; - /* - * allocation for (packed formats) or (uv-plane part of planar format): - * promote the expression to 64 bits to avoid overflowing, the - * result is < available as data_rate / total_data_rate < 1 + * We've accounted for all active planes; remaining planes are + * all disabled. */ - plane_blocks = minimum[plane_id]; - plane_blocks += div64_u64(alloc_size * data_rate, total_data_rate); + if (total_data_rate == 0) + break; - /* Leave disabled planes at (0,0) */ - if (data_rate) { - cstate->wm.skl.plane_ddb_y[plane_id].start = start; - cstate->wm.skl.plane_ddb_y[plane_id].end = start + plane_blocks; - } + wm = &cstate->wm.skl.optimal.planes[plane_id]; - start += plane_blocks; + rate = plane_data_rate[plane_id]; + extra = min_t(u16, alloc_size, + DIV64_U64_ROUND_UP(alloc_size * rate, + total_data_rate)); + total[plane_id] = wm->wm[level].plane_res_b + extra; + alloc_size -= extra; + total_data_rate -= rate; - /* Allocate DDB for UV plane for planar format/NV12 */ - uv_data_rate = uv_plane_data_rate[plane_id]; + if (total_data_rate == 0) + break; - uv_plane_blocks = uv_minimum[plane_id]; - uv_plane_blocks += div64_u64(alloc_size * uv_data_rate, total_data_rate); + rate = uv_plane_data_rate[plane_id]; + extra = min_t(u16, alloc_size, + DIV64_U64_ROUND_UP(alloc_size * rate, + total_data_rate)); + uv_total[plane_id] = wm->uv_wm[level].plane_res_b + extra; + alloc_size -= extra; + total_data_rate -= rate; + } + WARN_ON(alloc_size != 0 || total_data_rate != 0); + + /* Set the actual DDB start/end points for each plane */ + start = alloc->start; + for_each_plane_id_on_crtc(intel_crtc, plane_id) { + struct skl_ddb_entry *plane_alloc, *uv_plane_alloc; + + if (plane_id == PLANE_CURSOR) + continue; + + plane_alloc = &cstate->wm.skl.plane_ddb_y[plane_id]; + uv_plane_alloc = &cstate->wm.skl.plane_ddb_uv[plane_id]; /* Gen11+ uses a separate plane for UV watermarks */ - WARN_ON(INTEL_GEN(dev_priv) >= 11 && uv_plane_blocks); + WARN_ON(INTEL_GEN(dev_priv) >= 11 && uv_total[plane_id]); + + /* Leave disabled planes at (0,0) */ + if (total[plane_id]) { + plane_alloc->start = start; + start += total[plane_id]; + plane_alloc->end = start; + } - if (uv_data_rate) { - cstate->wm.skl.plane_ddb_uv[plane_id].start = start; - cstate->wm.skl.plane_ddb_uv[plane_id].end = - start + uv_plane_blocks; + if (uv_total[plane_id]) { + uv_plane_alloc->start = start; + start += uv_total[plane_id]; + uv_plane_alloc->end = start; } + } - start += uv_plane_blocks; + /* + * When we calculated watermark values we didn't know how high + * of a level we'd actually be able to hit, so we just marked + * all levels as "enabled." Go back now and disable the ones + * that aren't actually possible. + */ + for (level++; level <= ilk_wm_max_level(dev_priv); level++) { + for_each_plane_id_on_crtc(intel_crtc, plane_id) { + wm = &cstate->wm.skl.optimal.planes[plane_id]; + memset(&wm->wm[level], 0, sizeof(wm->wm[level])); + } + } + + /* + * Go back and disable the transition watermark if it turns out we + * don't have enough DDB blocks for it. + */ + for_each_plane_id_on_crtc(intel_crtc, plane_id) { + wm = &cstate->wm.skl.optimal.planes[plane_id]; + if (wm->trans_wm.plane_res_b > total[plane_id]) + memset(&wm->trans_wm, 0, sizeof(wm->trans_wm)); } return 0; @@ -4715,17 +4674,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *cstate, return 0; } -static int skl_compute_plane_wm(const struct intel_crtc_state *cstate, - const struct intel_plane_state *intel_pstate, - uint16_t ddb_allocation, - int level, - const struct skl_wm_params *wp, - const struct skl_wm_level *result_prev, - struct skl_wm_level *result /* out */) +static void skl_compute_plane_wm(const struct intel_crtc_state *cstate, + const struct intel_plane_state *intel_pstate, + int level, + const struct skl_wm_params *wp, + const struct skl_wm_level *result_prev, + struct skl_wm_level *result /* out */) { struct drm_i915_private *dev_priv = to_i915(intel_pstate->base.plane->dev); - const struct drm_plane_state *pstate = &intel_pstate->base; uint32_t latency = dev_priv->wm.skl_latency[level]; uint_fixed_16_16_t method1, method2; uint_fixed_16_16_t selected_result; @@ -4733,10 +4690,6 @@ static int skl_compute_plane_wm(const struct intel_crtc_state *cstate, struct intel_atomic_state *state = to_intel_atomic_state(cstate->base.state); bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); - uint32_t min_disp_buf_needed; - - if (latency == 0) - return level == 0 ? -EINVAL : 0; /* Display WA #1141: kbl,cfl */ if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || @@ -4800,61 +4753,24 @@ static int skl_compute_plane_wm(const struct intel_crtc_state *cstate, res_blocks = result_prev->plane_res_b; } - if (INTEL_GEN(dev_priv) >= 11) { - if (wp->y_tiled) { - uint32_t extra_lines; - uint_fixed_16_16_t fp_min_disp_buf_needed; - - if (res_lines % wp->y_min_scanlines == 0) - extra_lines = wp->y_min_scanlines; - else - extra_lines = wp->y_min_scanlines * 2 - - res_lines % wp->y_min_scanlines; - - fp_min_disp_buf_needed = mul_u32_fixed16(res_lines + - extra_lines, - wp->plane_blocks_per_line); - min_disp_buf_needed = fixed16_to_u32_round_up( - fp_min_disp_buf_needed); - } else { - min_disp_buf_needed = DIV_ROUND_UP(res_blocks * 11, 10); - } - } else { - min_disp_buf_needed = res_blocks; - } - - if ((level > 0 && res_lines > 31) || - res_blocks >= ddb_allocation || - min_disp_buf_needed >= ddb_allocation) { - /* - * If there are no valid level 0 watermarks, then we can't - * support this display configuration. - */ - if (level) { - return 0; - } else { - struct drm_plane *plane = pstate->plane; - - DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n"); - DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n", - plane->base.id, plane->name, - res_blocks, ddb_allocation, res_lines); - return -EINVAL; - } - } - /* The number of lines are ignored for the level 0 watermark. */ + if (level > 0 && res_lines > 31) + return; + + /* + * If res_lines is valid, assume we can use this watermark level + * for now. We'll come back and disable it after we calculate the + * DDB allocation if it turns out we don't actually have enough + * blocks to satisfy it. + */ result->plane_res_b = res_blocks; result->plane_res_l = res_lines; result->plane_en = true; - - return 0; } -static int +static void skl_compute_wm_levels(const struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, - uint16_t ddb_blocks, const struct skl_wm_params *wm_params, struct skl_wm_level *levels) { @@ -4862,25 +4778,15 @@ skl_compute_wm_levels(const struct intel_crtc_state *cstate, to_i915(intel_pstate->base.plane->dev); int level, max_level = ilk_wm_max_level(dev_priv); struct skl_wm_level *result_prev = &levels[0]; - int ret; for (level = 0; level <= max_level; level++) { struct skl_wm_level *result = &levels[level]; - ret = skl_compute_plane_wm(cstate, - intel_pstate, - ddb_blocks, - level, - wm_params, - result_prev, - result); - if (ret) - return ret; + skl_compute_plane_wm(cstate, intel_pstate, level, wm_params, + result_prev, result); result_prev = result; } - - return 0; } static uint32_t @@ -4908,8 +4814,7 @@ skl_compute_linetime_wm(const struct intel_crtc_state *cstate) static void skl_compute_transition_wm(const struct intel_crtc_state *cstate, const struct skl_wm_params *wp, - struct skl_plane_wm *wm, - uint16_t ddb_allocation) + struct skl_plane_wm *wm) { struct drm_device *dev = cstate->base.crtc->dev; const struct drm_i915_private *dev_priv = to_i915(dev); @@ -4957,12 +4862,13 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *cstate, } - res_blocks += 1; - - if (res_blocks < ddb_allocation) { - wm->trans_wm.plane_res_b = res_blocks; - wm->trans_wm.plane_en = true; - } + /* + * Just assume we can enable the transition watermark. After + * computing the DDB we'll come back and disable it if that + * assumption turns out to be false. + */ + wm->trans_wm.plane_res_b = res_blocks + 1; + wm->trans_wm.plane_en = true; } static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, @@ -4970,7 +4876,6 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, enum plane_id plane_id, int color_plane) { struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; - u16 ddb_blocks = skl_ddb_entry_size(&crtc_state->wm.skl.plane_ddb_y[plane_id]); struct skl_wm_params wm_params; int ret; @@ -4979,12 +4884,8 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, if (ret) return ret; - ret = skl_compute_wm_levels(crtc_state, plane_state, - ddb_blocks, &wm_params, wm->wm); - if (ret) - return ret; - - skl_compute_transition_wm(crtc_state, &wm_params, wm, ddb_blocks); + skl_compute_wm_levels(crtc_state, plane_state, &wm_params, wm->wm); + skl_compute_transition_wm(crtc_state, &wm_params, wm); return 0; } @@ -4994,7 +4895,6 @@ static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state, enum plane_id plane_id) { struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; - u16 ddb_blocks = skl_ddb_entry_size(&crtc_state->wm.skl.plane_ddb_uv[plane_id]); struct skl_wm_params wm_params; int ret; @@ -5006,10 +4906,7 @@ static int skl_build_plane_wm_uv(struct intel_crtc_state *crtc_state, if (ret) return ret; - ret = skl_compute_wm_levels(crtc_state, plane_state, - ddb_blocks, &wm_params, wm->uv_wm); - if (ret) - return ret; + skl_compute_wm_levels(crtc_state, plane_state, &wm_params, wm->uv_wm); return 0; } @@ -5521,13 +5418,9 @@ skl_compute_wm(struct intel_atomic_state *state) if (ret || !changed) return ret; - ret = skl_compute_ddb(state); - if (ret) - return ret; - /* * Calculate WM's for all pipes that are part of this transaction. - * Note that the DDB allocation above may have added more CRTC's that + * Note that skl_ddb_add_affected_pipes may have added more CRTC's that * weren't otherwise being modified (and set bits in dirty_pipes) if * pipe allocations had to change. */ @@ -5549,6 +5442,10 @@ skl_compute_wm(struct intel_atomic_state *state) results->dirty_pipes |= drm_crtc_mask(&crtc->base); } + ret = skl_compute_ddb(state); + if (ret) + return ret; + skl_print_wm_changes(state); return 0; -- 2.30.2