drm/radeon: Avoid overflows/divide-by-zero in latency_watermark calculations.
authorMario Kleiner <mario.kleiner.de@gmail.com>
Sun, 23 Apr 2017 23:33:08 +0000 (01:33 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 28 Apr 2017 21:33:03 +0000 (17:33 -0400)
At dot clocks > approx. 250 Mhz, some of these calcs will overflow and
cause miscalculation of latency watermarks, and for some overflows also
divide-by-zero driver crash. Make calcs more overflow resistant.

This is a direct port of the corresponding patch from amdgpu-kms,
copy-paste for cik from dce-8 and si from dce-6, with a slightly
simpler variant for evergreen dce-4/5.

Only tested on DCE-4 evergreen with a Radeon HD-5770.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/si.c

index 53710dd7d5dd55a020c813bc5b4c020c3ba55d23..4f034cbc4a6a81ba415de46603a21b48be5a94a2 100644 (file)
@@ -9150,23 +9150,10 @@ static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
        a.full = dfixed_const(available_bandwidth);
        b.full = dfixed_const(wm->num_heads);
        a.full = dfixed_div(a, b);
+       tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+       tmp = min(dfixed_trunc(a), tmp);
 
-       b.full = dfixed_const(mc_latency + 512);
-       c.full = dfixed_const(wm->disp_clk);
-       b.full = dfixed_div(b, c);
-
-       c.full = dfixed_const(dmif_size);
-       b.full = dfixed_div(c, b);
-
-       tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-       b.full = dfixed_const(1000);
-       c.full = dfixed_const(wm->disp_clk);
-       b.full = dfixed_div(c, b);
-       c.full = dfixed_const(wm->bytes_per_pixel);
-       b.full = dfixed_mul(b, c);
-
-       lb_fill_bw = min(tmp, dfixed_trunc(b));
+       lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
        a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
        b.full = dfixed_const(1000);
index d1b1e0cc3c25b773f32ccc74c9401cf99b363c84..3c9c133bcccc0328931a2e79c57a2cdedd6eca5a 100644 (file)
@@ -2188,13 +2188,7 @@ static u32 evergreen_latency_watermark(struct evergreen_wm_params *wm)
        b.full = dfixed_const(wm->num_heads);
        a.full = dfixed_div(a, b);
 
-       b.full = dfixed_const(1000);
-       c.full = dfixed_const(wm->disp_clk);
-       b.full = dfixed_div(c, b);
-       c.full = dfixed_const(wm->bytes_per_pixel);
-       b.full = dfixed_mul(b, c);
-
-       lb_fill_bw = min(dfixed_trunc(a), dfixed_trunc(b));
+       lb_fill_bw = min(dfixed_trunc(a), wm->disp_clk * wm->bytes_per_pixel / 1000);
 
        a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
        b.full = dfixed_const(1000);
index 528e5a49a214202c31c190af813213eb6f609912..3efdfd04069ac0ad749a92d524a63d37ff4fa770 100644 (file)
@@ -2204,23 +2204,10 @@ static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
        a.full = dfixed_const(available_bandwidth);
        b.full = dfixed_const(wm->num_heads);
        a.full = dfixed_div(a, b);
+       tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+       tmp = min(dfixed_trunc(a), tmp);
 
-       b.full = dfixed_const(mc_latency + 512);
-       c.full = dfixed_const(wm->disp_clk);
-       b.full = dfixed_div(b, c);
-
-       c.full = dfixed_const(dmif_size);
-       b.full = dfixed_div(c, b);
-
-       tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-       b.full = dfixed_const(1000);
-       c.full = dfixed_const(wm->disp_clk);
-       b.full = dfixed_div(c, b);
-       c.full = dfixed_const(wm->bytes_per_pixel);
-       b.full = dfixed_mul(b, c);
-
-       lb_fill_bw = min(tmp, dfixed_trunc(b));
+       lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
        a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
        b.full = dfixed_const(1000);