715f9b1930c5a0e6889036fd1d5101b1c781940d
[openwrt/staging/noltari.git] /
1 From 7c0802428580f42e538dc8dcff8e5fbed80a5202 Mon Sep 17 00:00:00 2001
2 From: Dom Cobley <popcornmix@gmail.com>
3 Date: Mon, 14 Mar 2022 17:56:10 +0000
4 Subject: [PATCH] vc4/drm: vc4_plane: Keep fractional source coords
5 inside state
6
7 Signed-off-by: Dom Cobley <popcornmix@gmail.com>
8 ---
9 drivers/gpu/drm/vc4/vc4_drv.h | 2 +-
10 drivers/gpu/drm/vc4/vc4_plane.c | 67 ++++++++++++++++-----------------
11 2 files changed, 34 insertions(+), 35 deletions(-)
12
13 --- a/drivers/gpu/drm/vc4/vc4_drv.h
14 +++ b/drivers/gpu/drm/vc4/vc4_drv.h
15 @@ -386,7 +386,7 @@ struct vc4_plane_state {
16
17 /* Clipped coordinates of the plane on the display. */
18 int crtc_x, crtc_y, crtc_w, crtc_h;
19 - /* Clipped area being scanned from in the FB. */
20 + /* Clipped area being scanned from in the FB in u16.16 format */
21 u32 src_x, src_y;
22
23 u32 src_w[2], src_h[2];
24 --- a/drivers/gpu/drm/vc4/vc4_plane.c
25 +++ b/drivers/gpu/drm/vc4/vc4_plane.c
26 @@ -179,9 +179,9 @@ static const struct hvs_format *vc4_get_
27
28 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
29 {
30 - if (dst == src)
31 + if (dst == src >> 16)
32 return VC4_SCALING_NONE;
33 - if (3 * dst >= 2 * src)
34 + if (3 * dst >= 2 * (src >> 16))
35 return VC4_SCALING_PPF;
36 else
37 return VC4_SCALING_TPZ;
38 @@ -388,14 +388,10 @@ static int vc4_plane_setup_clipping_and_
39 for (i = 0; i < num_planes; i++)
40 vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
41
42 - /* We don't support subpixel source positioning for scaling,
43 - * but fractional coordinates can be generated by clipping
44 - * so just round for now
45 - */
46 - vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1<<16);
47 - vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1<<16);
48 - vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1<<16) - vc4_state->src_x;
49 - vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1<<16) - vc4_state->src_y;
50 + vc4_state->src_x = state->src.x1;
51 + vc4_state->src_y = state->src.y1;
52 + vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
53 + vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;
54
55 vc4_state->crtc_x = state->dst.x1;
56 vc4_state->crtc_y = state->dst.y1;
57 @@ -448,7 +444,7 @@ static void vc4_write_tpz(struct vc4_pla
58 {
59 u32 scale, recip;
60
61 - scale = (1 << 16) * src / dst;
62 + scale = src / dst;
63
64 /* The specs note that while the reciprocal would be defined
65 * as (1<<32)/scale, ~0 is close enough.
66 @@ -494,7 +490,7 @@ static u32 vc4_lbm_size(struct drm_plane
67 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
68 pix_per_line = vc4_state->crtc_w;
69 else
70 - pix_per_line = vc4_state->src_w[0];
71 + pix_per_line = vc4_state->src_w[0] >> 16;
72
73 if (!vc4_state->is_yuv) {
74 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
75 @@ -585,7 +581,8 @@ static void vc4_plane_calc_load(struct d
76 for (i = 0; i < fb->format->num_planes; i++) {
77 /* Even if the bandwidth/plane required for a single frame is
78 *
79 - * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
80 + * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
81 + * cpp * vrefresh
82 *
83 * when downscaling, we have to read more pixels per line in
84 * the time frame reserved for a single line, so the bandwidth
85 @@ -594,11 +591,11 @@ static void vc4_plane_calc_load(struct d
86 * load by this number. We're likely over-estimating the read
87 * demand, but that's better than under-estimating it.
88 */
89 - vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
90 + vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
91 vc4_state->crtc_h);
92 - vc4_state->membus_load += vc4_state->src_w[i] *
93 - vc4_state->src_h[i] * vscale_factor *
94 - fb->format->cpp[i];
95 + vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
96 + (vc4_state->src_h[i] >> 16) *
97 + vscale_factor * fb->format->cpp[i];
98 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
99 }
100
101 @@ -751,7 +748,8 @@ static int vc4_plane_mode_set(struct drm
102 bool mix_plane_alpha;
103 bool covers_screen;
104 u32 scl0, scl1, pitch0;
105 - u32 tiling, src_y;
106 + u32 tiling, src_x, src_y;
107 + u32 width, height;
108 u32 hvs_format = format->hvs;
109 unsigned int rotation;
110 int ret, i;
111 @@ -763,6 +761,9 @@ static int vc4_plane_mode_set(struct drm
112 if (ret)
113 return ret;
114
115 + width = vc4_state->src_w[0] >> 16;
116 + height = vc4_state->src_h[0] >> 16;
117 +
118 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
119 * and 4:4:4, scl1 should be set to scl0 so both channels of
120 * the scaler do the same thing. For YUV, the Y plane needs
121 @@ -783,9 +784,11 @@ static int vc4_plane_mode_set(struct drm
122 DRM_MODE_REFLECT_Y);
123
124 /* We must point to the last line when Y reflection is enabled. */
125 - src_y = vc4_state->src_y;
126 + src_y = vc4_state->src_y >> 16;
127 if (rotation & DRM_MODE_REFLECT_Y)
128 - src_y += vc4_state->src_h[0] - 1;
129 + src_y += height - 1;
130 +
131 + src_x = vc4_state->src_x >> 16;
132
133 switch (base_format_mod) {
134 case DRM_FORMAT_MOD_LINEAR:
135 @@ -800,7 +803,7 @@ static int vc4_plane_mode_set(struct drm
136 (i ? v_subsample : 1) *
137 fb->pitches[i];
138
139 - vc4_state->offsets[i] += vc4_state->src_x /
140 + vc4_state->offsets[i] += src_x /
141 (i ? h_subsample : 1) *
142 fb->format->cpp[i];
143 }
144 @@ -823,7 +826,7 @@ static int vc4_plane_mode_set(struct drm
145 * pitch * tile_h == tile_size * tiles_per_row
146 */
147 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
148 - u32 tiles_l = vc4_state->src_x >> tile_w_shift;
149 + u32 tiles_l = src_x >> tile_w_shift;
150 u32 tiles_r = tiles_w - tiles_l;
151 u32 tiles_t = src_y >> tile_h_shift;
152 /* Intra-tile offsets, which modify the base address (the
153 @@ -833,7 +836,7 @@ static int vc4_plane_mode_set(struct drm
154 u32 tile_y = (src_y >> 4) & 1;
155 u32 subtile_y = (src_y >> 2) & 3;
156 u32 utile_y = src_y & 3;
157 - u32 x_off = vc4_state->src_x & tile_w_mask;
158 + u32 x_off = src_x & tile_w_mask;
159 u32 y_off = src_y & tile_h_mask;
160
161 /* When Y reflection is requested we must set the
162 @@ -929,7 +932,7 @@ static int vc4_plane_mode_set(struct drm
163 * of the 12-pixels in that 128-bit word is the
164 * first pixel to be used
165 */
166 - u32 remaining_pixels = vc4_state->src_x % 96;
167 + u32 remaining_pixels = src_x % 96;
168 u32 aligned = remaining_pixels / 12;
169 u32 last_bits = remaining_pixels % 12;
170
171 @@ -951,12 +954,12 @@ static int vc4_plane_mode_set(struct drm
172 return -EINVAL;
173 }
174 pix_per_tile = tile_w / fb->format->cpp[0];
175 - x_off = (vc4_state->src_x % pix_per_tile) /
176 + x_off = (src_x % pix_per_tile) /
177 (i ? h_subsample : 1) *
178 fb->format->cpp[i];
179 }
180
181 - tile = vc4_state->src_x / pix_per_tile;
182 + tile = src_x / pix_per_tile;
183
184 vc4_state->offsets[i] += param * tile_w * tile;
185 vc4_state->offsets[i] += src_y /
186 @@ -1017,10 +1020,8 @@ static int vc4_plane_mode_set(struct drm
187 vc4_dlist_write(vc4_state,
188 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
189 vc4_hvs4_get_alpha_blend_mode(state) |
190 - VC4_SET_FIELD(vc4_state->src_w[0],
191 - SCALER_POS2_WIDTH) |
192 - VC4_SET_FIELD(vc4_state->src_h[0],
193 - SCALER_POS2_HEIGHT));
194 + VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
195 + VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
196
197 /* Position Word 3: Context. Written by the HVS. */
198 vc4_dlist_write(vc4_state, 0xc0c0c0c0);
199 @@ -1078,10 +1079,8 @@ static int vc4_plane_mode_set(struct drm
200 /* Position Word 2: Source Image Size */
201 vc4_state->pos2_offset = vc4_state->dlist_count;
202 vc4_dlist_write(vc4_state,
203 - VC4_SET_FIELD(vc4_state->src_w[0],
204 - SCALER5_POS2_WIDTH) |
205 - VC4_SET_FIELD(vc4_state->src_h[0],
206 - SCALER5_POS2_HEIGHT));
207 + VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
208 + VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
209
210 /* Position Word 3: Context. Written by the HVS. */
211 vc4_dlist_write(vc4_state, 0xc0c0c0c0);