1 From 4b33f988d9da9776bcfe218df4ab9865f4b771a8 Mon Sep 17 00:00:00 2001
2 From: John Cox <jc@kynesim.co.uk>
3 Date: Thu, 21 May 2020 11:49:37 +0100
4 Subject: [PATCH] media: rpivid: Remove the need to have
7 VAAPI H265 has num entry points but never sets it. Allow a VAAPI
8 shim to work without requiring rewriting the VAAPI driver.
9 num_entry_points can be calculated from the slice_segment_addr
10 of the next slice so delay processing until we have that.
12 Also includes some minor cosmetics.
14 Signed-off-by: John Cox <jc@kynesim.co.uk>
16 drivers/staging/media/rpivid/rpivid_h265.c | 699 +++++++++++----------
17 1 file changed, 365 insertions(+), 334 deletions(-)
19 --- a/drivers/staging/media/rpivid/rpivid_h265.c
20 +++ b/drivers/staging/media/rpivid/rpivid_h265.c
21 @@ -202,8 +202,17 @@ struct rpivid_dec_env {
22 unsigned int dpbno_col;
24 int collocated_from_l0_flag;
25 - unsigned int wpp_entry_x;
26 - unsigned int wpp_entry_y;
28 + * Last CTB/Tile X,Y processed by (wpp_)entry_point
29 + * Could be in _state as P0 only but needs updating where _state
32 + unsigned int entry_ctb_x;
33 + unsigned int entry_ctb_y;
34 + unsigned int entry_tile_x;
35 + unsigned int entry_tile_y;
36 + unsigned int entry_qp;
41 @@ -239,22 +248,17 @@ struct rpivid_dec_state {
42 struct v4l2_ctrl_hevc_pps pps;
44 // Helper vars & tables derived from sps/pps
45 - unsigned int log2_ctb_size; /* log2 width of a CTB */
46 - unsigned int ctb_width; /* Width in CTBs */
47 - unsigned int ctb_height; /* Height in CTBs */
48 - unsigned int ctb_size; /* Pic area in CTBs */
49 - unsigned int num_tile_columns;
50 - unsigned int num_tile_rows;
51 - u8 column_width[member_size(struct v4l2_ctrl_hevc_pps,
52 - column_width_minus1)];
53 - u8 row_height[member_size(struct v4l2_ctrl_hevc_pps,
54 - row_height_minus1)];
55 + unsigned int log2_ctb_size; /* log2 width of a CTB */
56 + unsigned int ctb_width; /* Width in CTBs */
57 + unsigned int ctb_height; /* Height in CTBs */
58 + unsigned int ctb_size; /* Pic area in CTBs */
59 + unsigned int tile_width; /* Width in tiles */
60 + unsigned int tile_height; /* Height in tiles */
64 int *ctb_addr_rs_to_ts;
65 int *ctb_addr_ts_to_rs;
68 // Aux starage for DPB
70 @@ -274,6 +278,12 @@ struct rpivid_dec_state {
71 unsigned int slice_qp;
72 unsigned int max_num_merge_cand; // 0 if I-slice
73 bool dependent_slice_segment_flag;
75 + unsigned int start_ts; /* slice_segment_addr -> ts */
76 + unsigned int start_ctb_x; /* CTB X,Y of start_ts */
77 + unsigned int start_ctb_y;
78 + unsigned int prev_ctb_x; /* CTB X,Y of start_ts - 1 */
79 + unsigned int prev_ctb_y;
82 static inline int clip_int(const int x, const int lo, const int hi)
83 @@ -319,15 +329,16 @@ static int ctb_to_tile(unsigned int ctb,
87 -static int ctb_to_slice_w_h(unsigned int ctb, int ctb_size, int width,
88 - unsigned int *bd, int num)
89 +static unsigned int ctb_to_tile_x(const struct rpivid_dec_state *const s,
90 + const unsigned int ctb_x)
92 - if (ctb < bd[num - 1])
94 - else if (width % ctb_size)
95 - return width % ctb_size;
98 + return ctb_to_tile(ctb_x, s->col_bd, s->tile_width);
101 +static unsigned int ctb_to_tile_y(const struct rpivid_dec_state *const s,
102 + const unsigned int ctb_y)
104 + return ctb_to_tile(ctb_y, s->row_bd, s->tile_height);
107 static void aux_q_free(struct rpivid_ctx *const ctx,
108 @@ -532,6 +543,15 @@ static void write_prob(struct rpivid_dec
109 p1_apb_write(de, 0x1000 + i,
110 dst[i] + (dst[i + 1] << 8) + (dst[i + 2] << 16) +
114 + * Having written the prob array back it up
115 + * This is not always needed but is a small overhead that simplifies
116 + * (and speeds up) some multi-tile & WPP scenarios
117 + * There are no scenarios where having written a prob we ever want
118 + * a previous (non-initial) state back
120 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
123 static void write_scaling_factors(struct rpivid_dec_env *const de)
124 @@ -552,8 +572,8 @@ static inline __u32 dma_to_axi_addr(dma_
125 static void write_bitstream(struct rpivid_dec_env *const de,
126 const struct rpivid_dec_state *const s)
128 - // Note that FFmpeg removes emulation prevention bytes, so this is
129 - // matched in the configuration here.
130 + // Note that FFmpeg V4L2 does not remove emulation prevention bytes,
131 + // so this is matched in the configuration here.
132 // Whether that is the correct behaviour or not is not clear in the
134 const int rpi_use_emu = 1;
135 @@ -579,78 +599,26 @@ static void write_bitstream(struct rpivi
137 //////////////////////////////////////////////////////////////////////////////
139 -static void write_slice(struct rpivid_dec_env *const de,
140 - const struct rpivid_dec_state *const s,
141 - const unsigned int slice_w,
142 - const unsigned int slice_h)
144 - u32 u32 = (s->sh->slice_type << 12) +
146 - V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA) != 0)
149 - V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA) != 0)
151 - (slice_w << 17) + (slice_h << 24);
153 - u32 |= (s->max_num_merge_cand << 0) + (s->nb_refs[L0] << 4) +
154 - (s->nb_refs[L1] << 8);
156 - if (s->sh->slice_type == HEVC_SLICE_B)
157 - u32 |= ((s->sh->flags &
158 - V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO) != 0)
160 - p1_apb_write(de, RPI_SLICE, u32);
163 -//////////////////////////////////////////////////////////////////////////////
166 -static void new_entry_point(struct rpivid_dec_env *const de,
167 - const struct rpivid_dec_state *const s,
169 - const int reset_qp_y, const int ctb_addr_ts)
171 + * The slice constant part of the slice register - width and height need to
172 + * be ORed in later as they are per-tile / WPP-row
174 +static u32 slice_reg_const(const struct rpivid_dec_state *const s)
176 - int ctb_col = s->ctb_addr_ts_to_rs[ctb_addr_ts] %
177 - de->pic_width_in_ctbs_y;
178 - int ctb_row = s->ctb_addr_ts_to_rs[ctb_addr_ts] /
179 - de->pic_width_in_ctbs_y;
181 - int tile_x = ctb_to_tile(ctb_col, s->col_bd, s->num_tile_columns);
182 - int tile_y = ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows);
184 - int endx = s->col_bd[tile_x + 1] - 1;
185 - int endy = s->row_bd[tile_y + 1] - 1;
187 - u8 slice_w = ctb_to_slice_w_h(ctb_col, 1 << s->log2_ctb_size,
188 - s->sps.pic_width_in_luma_samples,
189 - s->col_bd, s->num_tile_columns);
190 - u8 slice_h = ctb_to_slice_w_h(ctb_row, 1 << s->log2_ctb_size,
191 - s->sps.pic_height_in_luma_samples,
192 - s->row_bd, s->num_tile_rows);
194 - p1_apb_write(de, RPI_TILESTART,
195 - s->col_bd[tile_x] + (s->row_bd[tile_y] << 16));
196 - p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
199 - p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
200 + u32 x = (s->max_num_merge_cand << 0) |
201 + (s->nb_refs[L0] << 4) |
202 + (s->nb_refs[L1] << 8) |
203 + (s->sh->slice_type << 12);
205 + if (s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA)
207 + if (s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA)
209 + if (s->sh->slice_type == HEVC_SLICE_B &&
210 + (s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO))
213 - write_slice(de, s, slice_w, slice_h);
216 - unsigned int sps_qp_bd_offset =
217 - 6 * s->sps.bit_depth_luma_minus8;
219 - p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
222 - p1_apb_write(de, RPI_MODE,
223 - (0xFFFF << 0) + (0x0 << 16) +
224 - ((tile_x == s->num_tile_columns - 1) << 17) +
225 - ((tile_y == s->num_tile_rows - 1) << 18));
227 - p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
231 //////////////////////////////////////////////////////////////////////////////
232 @@ -934,197 +902,256 @@ static void pre_slice_decode(struct rpiv
233 (sh->slice_cb_qp_offset & 31)); // CMD_QPOFF
236 -//////////////////////////////////////////////////////////////////////////////
237 -// Write STATUS register with expected end CTU address of previous slice
239 -static void end_previous_slice(struct rpivid_dec_env *const de,
240 - const struct rpivid_dec_state *const s,
241 - const int ctb_addr_ts)
244 - s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
246 - s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
248 - p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
251 -static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row)
253 - p1_apb_write(de, RPI_STATUS, (ctb_row << 18) + 0x25);
254 - p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
255 - p1_apb_write(de, RPI_MODE,
256 - ctb_row == de->pic_height_in_ctbs_y - 1 ?
257 - 0x70000 : 0x30000);
258 - p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2);
261 -static void wpp_end_previous_slice(struct rpivid_dec_env *const de,
262 - const struct rpivid_dec_state *const s,
265 - int new_x = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
266 - int new_y = s->sh->slice_segment_addr / de->pic_width_in_ctbs_y;
268 - s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
270 - s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
272 - if (de->wpp_entry_x < 2 && (de->wpp_entry_y < new_y || new_x > 2) &&
273 - de->pic_width_in_ctbs_y > 2)
274 - wpp_pause(de, last_y);
275 - p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
276 - if (new_x == 2 || (de->pic_width_in_ctbs_y == 2 &&
277 - de->wpp_entry_y < new_y))
278 - p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
279 +static void write_slice(struct rpivid_dec_env *const de,
280 + const struct rpivid_dec_state *const s,
281 + const u32 slice_const,
282 + const unsigned int ctb_col,
283 + const unsigned int ctb_row)
285 + const unsigned int cs = (1 << s->log2_ctb_size);
286 + const unsigned int w_last = s->sps.pic_width_in_luma_samples & (cs - 1);
287 + const unsigned int h_last = s->sps.pic_height_in_luma_samples & (cs - 1);
289 + p1_apb_write(de, RPI_SLICE,
291 + ((ctb_col + 1 < s->ctb_width || !w_last ?
292 + cs : w_last) << 17) |
293 + ((ctb_row + 1 < s->ctb_height || !h_last ?
294 + cs : h_last) << 24));
297 -//////////////////////////////////////////////////////////////////////////////
299 +#define PAUSE_MODE_WPP 1
300 +#define PAUSE_MODE_TILE 0xffff
302 -static void wpp_entry_point(struct rpivid_dec_env *const de,
304 + * N.B. This can be called to fill in data from the previous slice so must not
305 + * use any state data that may change from slice to slice (e.g. qp)
307 +static void new_entry_point(struct rpivid_dec_env *const de,
308 const struct rpivid_dec_state *const s,
310 - const int reset_qp_y, const int ctb_addr_ts)
312 - int ctb_size = 1 << s->log2_ctb_size;
313 - int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
315 - int ctb_col = de->wpp_entry_x = ctb_addr_rs % de->pic_width_in_ctbs_y;
316 - int ctb_row = de->wpp_entry_y = ctb_addr_rs / de->pic_width_in_ctbs_y;
318 + const bool reset_qp_y,
319 + const u32 pause_mode,
320 + const unsigned int tile_x,
321 + const unsigned int tile_y,
322 + const unsigned int ctb_col,
323 + const unsigned int ctb_row,
324 + const unsigned int slice_qp,
325 + const u32 slice_const)
327 + const unsigned int endx = s->col_bd[tile_x + 1] - 1;
328 + const unsigned int endy = (pause_mode == PAUSE_MODE_WPP) ?
329 + ctb_row : s->row_bd[tile_y + 1] - 1;
331 - int endx = de->pic_width_in_ctbs_y - 1;
332 - int endy = ctb_row;
334 - u8 slice_w = ctb_to_slice_w_h(ctb_col, ctb_size,
335 - s->sps.pic_width_in_luma_samples,
336 - s->col_bd, s->num_tile_columns);
337 - u8 slice_h = ctb_to_slice_w_h(ctb_row, ctb_size,
338 - s->sps.pic_height_in_luma_samples,
339 - s->row_bd, s->num_tile_rows);
341 - p1_apb_write(de, RPI_TILESTART, 0);
342 - p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
343 + p1_apb_write(de, RPI_TILESTART,
344 + s->col_bd[tile_x] | (s->row_bd[tile_y] << 16));
345 + p1_apb_write(de, RPI_TILEEND, endx | (endy << 16));
348 - p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
349 + p1_apb_write(de, RPI_BEGINTILEEND, endx | (endy << 16));
351 - write_slice(de, s, slice_w,
352 - ctb_row == de->pic_height_in_ctbs_y - 1 ?
353 - slice_h : ctb_size);
354 + write_slice(de, s, slice_const, endx, endy);
357 unsigned int sps_qp_bd_offset =
358 6 * s->sps.bit_depth_luma_minus8;
360 - p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
361 + p1_apb_write(de, RPI_QP, sps_qp_bd_offset + slice_qp);
364 p1_apb_write(de, RPI_MODE,
365 - ctb_row == de->pic_height_in_ctbs_y - 1 ?
366 - 0x60001 : 0x20001);
367 - p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
369 + ((endx == s->ctb_width - 1) << 17) |
370 + ((endy == s->ctb_height - 1) << 18));
372 + p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) | (ctb_row << 16));
374 + de->entry_tile_x = tile_x;
375 + de->entry_tile_y = tile_y;
376 + de->entry_ctb_x = ctb_col;
377 + de->entry_ctb_y = ctb_row;
378 + de->entry_qp = slice_qp;
379 + de->entry_slice = slice_const;
382 //////////////////////////////////////////////////////////////////////////////
385 +static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row)
387 + p1_apb_write(de, RPI_STATUS, (ctb_row << 18) | 0x25);
388 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
389 + p1_apb_write(de, RPI_MODE,
390 + ctb_row == de->pic_height_in_ctbs_y - 1 ?
391 + 0x70000 : 0x30000);
392 + p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2);
395 +static void wpp_entry_fill(struct rpivid_dec_env *const de,
396 + const struct rpivid_dec_state *const s,
397 + const unsigned int last_y)
399 + const unsigned int last_x = s->ctb_width - 1;
401 + while (de->entry_ctb_y < last_y) {
402 + /* wpp_entry_x/y set by wpp_entry_point */
403 + if (s->ctb_width > 2)
404 + wpp_pause(de, de->entry_ctb_y);
405 + p1_apb_write(de, RPI_STATUS,
406 + (de->entry_ctb_y << 18) | (last_x << 5) | 2);
408 + /* if width == 1 then the saved state is the init one */
409 + if (s->ctb_width == 2)
410 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
412 + p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
414 + new_entry_point(de, s, false, true, PAUSE_MODE_WPP,
415 + 0, 0, 0, de->entry_ctb_y + 1,
416 + de->entry_qp, de->entry_slice);
420 +static void wpp_end_previous_slice(struct rpivid_dec_env *const de,
421 + const struct rpivid_dec_state *const s)
423 + wpp_entry_fill(de, s, s->prev_ctb_y);
425 + if (de->entry_ctb_x < 2 &&
426 + (de->entry_ctb_y < s->start_ctb_y || s->start_ctb_x > 2) &&
428 + wpp_pause(de, s->prev_ctb_y);
429 + p1_apb_write(de, RPI_STATUS,
430 + 1 | (s->prev_ctb_x << 5) | (s->prev_ctb_y << 18));
431 + if (s->start_ctb_x == 2 ||
432 + (s->ctb_width == 2 && de->entry_ctb_y < s->start_ctb_y))
433 + p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
436 +/* Only main profile supported so WPP => !Tiles which makes some of the
437 + * next chunk code simpler
439 static void wpp_decode_slice(struct rpivid_dec_env *const de,
440 - const struct rpivid_dec_state *const s,
441 - const struct v4l2_ctrl_hevc_slice_params *sh,
444 - int i, reset_qp_y = 1;
445 - int indep = !s->dependent_slice_segment_flag;
446 - int ctb_col = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
447 + const struct rpivid_dec_state *const s)
449 + bool reset_qp_y = true;
450 + const bool indep = !s->dependent_slice_segment_flag;
453 - wpp_end_previous_slice(de, s, ctb_addr_ts);
455 + wpp_end_previous_slice(de, s);
456 pre_slice_decode(de, s);
457 write_bitstream(de, s);
458 - if (ctb_addr_ts == 0 || indep || de->pic_width_in_ctbs_y == 1)
460 + if (!s->start_ts || indep || s->ctb_width == 1)
462 - else if (ctb_col == 0)
463 + else if (!s->start_ctb_x)
464 p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
467 + reset_qp_y = false;
469 program_slicecmds(de, s->slice_idx);
470 new_slice_segment(de, s);
471 - wpp_entry_point(de, s, indep, reset_qp_y, ctb_addr_ts);
472 + new_entry_point(de, s, indep, reset_qp_y, PAUSE_MODE_WPP,
473 + 0, 0, s->start_ctb_x, s->start_ctb_y,
474 + s->slice_qp, slice_reg_const(s));
476 - for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
477 - int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
478 - int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
479 - int last_x = de->pic_width_in_ctbs_y - 1;
480 + if (s->frame_end) {
481 + wpp_entry_fill(de, s, s->ctb_height - 1);
483 + if (de->entry_ctb_x < 2 && s->ctb_width > 2)
484 + wpp_pause(de, s->ctb_height - 1);
486 - if (de->pic_width_in_ctbs_y > 2)
487 - wpp_pause(de, ctb_row);
488 p1_apb_write(de, RPI_STATUS,
489 - (ctb_row << 18) + (last_x << 5) + 2);
490 - if (de->pic_width_in_ctbs_y == 2)
491 - p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
492 - if (de->pic_width_in_ctbs_y == 1)
495 - p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
496 - ctb_addr_ts += s->column_width[0];
497 - wpp_entry_point(de, s, 0, 1, ctb_addr_ts);
498 + 1 | ((s->ctb_width - 1) << 5) |
499 + ((s->ctb_height - 1) << 18));
504 //////////////////////////////////////////////////////////////////////////////
507 +static void tile_entry_fill(struct rpivid_dec_env *const de,
508 + const struct rpivid_dec_state *const s,
509 + const unsigned int last_tile_x,
510 + const unsigned int last_tile_y)
512 + while (de->entry_tile_y < last_tile_y ||
513 + (de->entry_tile_y == last_tile_y &&
514 + de->entry_tile_x < last_tile_x)) {
515 + unsigned int t_x = de->entry_tile_x;
516 + unsigned int t_y = de->entry_tile_y;
517 + const unsigned int last_x = s->col_bd[t_x + 1] - 1;
518 + const unsigned int last_y = s->row_bd[t_y + 1] - 1;
520 + p1_apb_write(de, RPI_STATUS,
521 + 2 | (last_x << 5) | (last_y << 18));
522 + p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
525 + if (++t_x >= s->tile_width) {
530 + new_entry_point(de, s, false, true, PAUSE_MODE_TILE,
531 + t_x, t_y, s->col_bd[t_x], s->row_bd[t_y],
532 + de->entry_qp, de->entry_slice);
537 + * Write STATUS register with expected end CTU address of previous slice
539 +static void end_previous_slice(struct rpivid_dec_env *const de,
540 + const struct rpivid_dec_state *const s)
542 + tile_entry_fill(de, s,
543 + ctb_to_tile_x(s, s->prev_ctb_x),
544 + ctb_to_tile_y(s, s->prev_ctb_y));
545 + p1_apb_write(de, RPI_STATUS,
546 + 1 | (s->prev_ctb_x << 5) | (s->prev_ctb_y << 18));
549 static void decode_slice(struct rpivid_dec_env *const de,
550 - const struct rpivid_dec_state *const s,
551 - const struct v4l2_ctrl_hevc_slice_params *const sh,
553 + const struct rpivid_dec_state *const s)
557 + unsigned int tile_x = ctb_to_tile_x(s, s->start_ctb_x);
558 + unsigned int tile_y = ctb_to_tile_y(s, s->start_ctb_y);
561 - end_previous_slice(de, s, ctb_addr_ts);
563 + end_previous_slice(de, s);
565 pre_slice_decode(de, s);
566 write_bitstream(de, s);
568 -#if DEBUG_TRACE_P1_CMD
570 - v4l2_info(&de->ctx->dev->v4l2_dev,
571 - "TS=%d, tile=%d/%d, dss=%d, flags=%#llx\n",
572 - ctb_addr_ts, s->tile_id[ctb_addr_ts],
573 - s->tile_id[ctb_addr_ts - 1],
574 - s->dependent_slice_segment_flag, sh->flags);
578 - reset_qp_y = ctb_addr_ts == 0 ||
579 - s->tile_id[ctb_addr_ts] != s->tile_id[ctb_addr_ts - 1] ||
580 - !s->dependent_slice_segment_flag;
581 + reset_qp_y = !s->start_ts ||
582 + !s->dependent_slice_segment_flag ||
583 + tile_x != ctb_to_tile_x(s, s->prev_ctb_x) ||
584 + tile_y != ctb_to_tile_y(s, s->prev_ctb_y);
588 program_slicecmds(de, s->slice_idx);
589 new_slice_segment(de, s);
590 new_entry_point(de, s, !s->dependent_slice_segment_flag, reset_qp_y,
593 - for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
594 - int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
595 - int ctb_col = ctb_addr_rs % de->pic_width_in_ctbs_y;
596 - int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
597 - int tile_x = ctb_to_tile(ctb_col, s->col_bd,
598 - s->num_tile_columns - 1);
600 - ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows - 1);
601 - int last_x = s->col_bd[tile_x + 1] - 1;
602 - int last_y = s->row_bd[tile_y + 1] - 1;
604 + tile_x, tile_y, s->start_ctb_x, s->start_ctb_y,
605 + s->slice_qp, slice_reg_const(s));
608 + * If this is the last slice then fill in the other tile entries
609 + * now, otherwise this will be done at the start of the next slice
610 + * when it will be known where this slice finishes
612 + if (s->frame_end) {
613 + tile_entry_fill(de, s,
615 + s->tile_height - 1);
616 p1_apb_write(de, RPI_STATUS,
617 - 2 + (last_x << 5) + (last_y << 18));
619 - ctb_addr_ts += s->column_width[tile_x] * s->row_height[tile_y];
620 - new_entry_point(de, s, 0, 1, ctb_addr_ts);
621 + 1 | ((s->ctb_width - 1) << 5) |
622 + ((s->ctb_height - 1) << 18));
626 @@ -1132,13 +1159,12 @@ static void decode_slice(struct rpivid_d
629 static void expand_scaling_list(const unsigned int size_id,
630 - const unsigned int matrix_id, u8 *const dst0,
632 const u8 *const src0, uint8_t dc)
637 - // FIXME: matrix_id is unused ?
640 memcpy(dst0, src0, 16);
641 @@ -1199,24 +1225,20 @@ static void populate_scaling_factors(con
644 for (mid = 0; mid < 6; mid++)
645 - expand_scaling_list(0, mid,
646 - de->scaling_factors +
647 + expand_scaling_list(0, de->scaling_factors +
648 scaling_factor_offsets[0][mid],
649 sl->scaling_list_4x4[mid], 0);
650 for (mid = 0; mid < 6; mid++)
651 - expand_scaling_list(1, mid,
652 - de->scaling_factors +
653 + expand_scaling_list(1, de->scaling_factors +
654 scaling_factor_offsets[1][mid],
655 sl->scaling_list_8x8[mid], 0);
656 for (mid = 0; mid < 6; mid++)
657 - expand_scaling_list(2, mid,
658 - de->scaling_factors +
659 + expand_scaling_list(2, de->scaling_factors +
660 scaling_factor_offsets[2][mid],
661 sl->scaling_list_16x16[mid],
662 sl->scaling_list_dc_coef_16x16[mid]);
663 - for (mid = 0; mid < 2; mid += 1)
664 - expand_scaling_list(3, mid,
665 - de->scaling_factors +
666 + for (mid = 0; mid < 2; mid++)
667 + expand_scaling_list(3, de->scaling_factors +
668 scaling_factor_offsets[3][mid],
669 sl->scaling_list_32x32[mid],
670 sl->scaling_list_dc_coef_32x32[mid]);
671 @@ -1228,8 +1250,6 @@ static void free_ps_info(struct rpivid_d
672 s->ctb_addr_rs_to_ts = NULL;
673 kfree(s->ctb_addr_ts_to_rs);
674 s->ctb_addr_ts_to_rs = NULL;
680 @@ -1237,10 +1257,52 @@ static void free_ps_info(struct rpivid_d
684 +static unsigned int tile_width(const struct rpivid_dec_state *const s,
685 + const unsigned int t_x)
687 + return s->col_bd[t_x + 1] - s->col_bd[t_x];
690 +static unsigned int tile_height(const struct rpivid_dec_state *const s,
691 + const unsigned int t_y)
693 + return s->row_bd[t_y + 1] - s->row_bd[t_y];
696 +static void fill_rs_to_ts(struct rpivid_dec_state *const s)
698 + unsigned int ts = 0;
700 + unsigned int tr_rs = 0;
702 + for (t_y = 0; t_y != s->tile_height; ++t_y) {
703 + const unsigned int t_h = tile_height(s, t_y);
705 + unsigned int tc_rs = tr_rs;
707 + for (t_x = 0; t_x != s->tile_width; ++t_x) {
708 + const unsigned int t_w = tile_width(s, t_x);
710 + unsigned int rs = tc_rs;
712 + for (y = 0; y != t_h; ++y) {
715 + for (x = 0; x != t_w; ++x) {
716 + s->ctb_addr_rs_to_ts[rs + x] = ts;
717 + s->ctb_addr_ts_to_rs[ts] = rs + x;
720 + rs += s->ctb_width;
724 + tr_rs += t_h * s->ctb_width;
728 static int updated_ps(struct rpivid_dec_state *const s)
730 - unsigned int ctb_addr_rs;
731 - int j, x, y, tile_id;
735 @@ -1259,104 +1321,49 @@ static int updated_ps(struct rpivid_dec_
737 // Inferred parameters
739 + s->ctb_addr_rs_to_ts = kmalloc_array(s->ctb_size,
740 + sizeof(*s->ctb_addr_rs_to_ts),
742 + s->ctb_addr_ts_to_rs = kmalloc_array(s->ctb_size,
743 + sizeof(*s->ctb_addr_ts_to_rs),
746 if (!(s->pps.flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
747 - s->num_tile_columns = 1;
748 - s->num_tile_rows = 1;
749 - s->column_width[0] = s->ctb_width;
750 - s->row_height[0] = s->ctb_height;
752 + s->tile_height = 1;
754 - s->num_tile_columns = s->pps.num_tile_columns_minus1 + 1;
755 - s->num_tile_rows = s->pps.num_tile_rows_minus1 + 1;
756 - for (i = 0; i < s->num_tile_columns; ++i)
757 - s->column_width[i] = s->pps.column_width_minus1[i] + 1;
758 - for (i = 0; i < s->num_tile_rows; ++i)
759 - s->row_height[i] = s->pps.row_height_minus1[i] + 1;
760 + s->tile_width = s->pps.num_tile_columns_minus1 + 1;
761 + s->tile_height = s->pps.num_tile_rows_minus1 + 1;
764 - s->col_bd = kmalloc((s->num_tile_columns + 1) * sizeof(*s->col_bd),
765 + s->col_bd = kmalloc((s->tile_width + 1) * sizeof(*s->col_bd),
767 - s->row_bd = kmalloc((s->num_tile_rows + 1) * sizeof(*s->row_bd),
768 + s->row_bd = kmalloc((s->tile_height + 1) * sizeof(*s->row_bd),
772 - for (i = 0; i < s->num_tile_columns; i++)
773 - s->col_bd[i + 1] = s->col_bd[i] + s->column_width[i];
774 + for (i = 1; i < s->tile_width; i++)
775 + s->col_bd[i] = s->col_bd[i - 1] +
776 + s->pps.column_width_minus1[i - 1] + 1;
777 + s->col_bd[s->tile_width] = s->ctb_width;
780 - for (i = 0; i < s->num_tile_rows; i++)
781 - s->row_bd[i + 1] = s->row_bd[i] + s->row_height[i];
782 + for (i = 1; i < s->tile_height; i++)
783 + s->row_bd[i] = s->row_bd[i - 1] +
784 + s->pps.row_height_minus1[i - 1] + 1;
785 + s->row_bd[s->tile_height] = s->ctb_height;
787 - s->ctb_addr_rs_to_ts = kmalloc_array(s->ctb_size,
788 - sizeof(*s->ctb_addr_rs_to_ts),
790 - s->ctb_addr_ts_to_rs = kmalloc_array(s->ctb_size,
791 - sizeof(*s->ctb_addr_ts_to_rs),
793 - s->tile_id = kmalloc_array(s->ctb_size, sizeof(*s->tile_id),
796 - for (ctb_addr_rs = 0; ctb_addr_rs < s->ctb_size; ctb_addr_rs++) {
797 - int tb_x = ctb_addr_rs % s->ctb_width;
798 - int tb_y = ctb_addr_rs / s->ctb_width;
803 - for (i = 0; i < s->num_tile_columns; i++) {
804 - if (tb_x < s->col_bd[i + 1]) {
810 - for (i = 0; i < s->num_tile_rows; i++) {
811 - if (tb_y < s->row_bd[i + 1]) {
817 - for (i = 0; i < tile_x; i++)
818 - val += s->row_height[tile_y] * s->column_width[i];
819 - for (i = 0; i < tile_y; i++)
820 - val += s->ctb_width * s->row_height[i];
822 - val += (tb_y - s->row_bd[tile_y]) * s->column_width[tile_x] +
823 - tb_x - s->col_bd[tile_x];
825 - s->ctb_addr_rs_to_ts[ctb_addr_rs] = val;
826 - s->ctb_addr_ts_to_rs[val] = ctb_addr_rs;
829 - for (j = 0, tile_id = 0; j < s->num_tile_rows; j++)
830 - for (i = 0; i < s->num_tile_columns; i++, tile_id++)
831 - for (y = s->row_bd[j]; y < s->row_bd[j + 1]; y++)
832 - for (x = s->col_bd[i];
833 - x < s->col_bd[i + 1];
835 - s->tile_id[s->ctb_addr_rs_to_ts
836 - [y * s->ctb_width +
843 -static int frame_end(struct rpivid_dev *const dev,
844 - struct rpivid_dec_env *const de,
845 - const struct rpivid_dec_state *const s)
847 - const unsigned int last_x = s->col_bd[s->num_tile_columns] - 1;
848 - const unsigned int last_y = s->row_bd[s->num_tile_rows] - 1;
851 - if (s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) {
852 - if (de->wpp_entry_x < 2 && de->pic_width_in_ctbs_y > 2)
853 - wpp_pause(de, last_y);
855 - p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
857 +static int write_cmd_buffer(struct rpivid_dev *const dev,
858 + struct rpivid_dec_env *const de,
859 + const struct rpivid_dec_state *const s)
861 // Copy commands out to dma buf
862 - cmd_size = de->cmd_len * sizeof(de->cmd_fifo[0]);
863 + const size_t cmd_size = de->cmd_len * sizeof(de->cmd_fifo[0]);
865 if (!de->cmd_copy_gptr->ptr || cmd_size > de->cmd_copy_gptr->size) {
866 size_t cmd_alloc = round_up_size(cmd_size);
867 @@ -1521,18 +1528,19 @@ static void rpivid_h265_setup(struct rpi
868 struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
869 struct rpivid_dec_state *const s = ctx->state;
870 struct vb2_queue *vq;
871 - struct rpivid_dec_env *de;
873 + struct rpivid_dec_env *de = ctx->dec0;
874 + unsigned int prev_rs;
877 bool slice_temporal_mvp;
879 + xtrace_in(dev, de);
881 pred_weight_table = &sh->pred_weight_table;
884 ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0);
887 slice_temporal_mvp = (sh->flags &
888 V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED);
890 @@ -1662,6 +1670,13 @@ static void rpivid_h265_setup(struct rpi
891 s->sps.pic_height_in_luma_samples);
894 + if ((s->tile_width != 1 || s->tile_height != 1) &&
896 + V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED)) {
897 + v4l2_warn(&dev->v4l2_dev,
898 + "Tiles + WPP not supported\n");
902 // Fill in ref planes with our address s.t. if we mess
903 // up refs somehow then we still have a valid address
904 @@ -1760,15 +1775,24 @@ static void rpivid_h265_setup(struct rpi
905 if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
906 populate_scaling_factors(run, de, s);
908 - ctb_addr_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
909 + // Calc all the random coord info to avoid repeated conversion in/out
910 + s->start_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
911 + s->start_ctb_x = sh->slice_segment_addr % de->pic_width_in_ctbs_y;
912 + s->start_ctb_y = sh->slice_segment_addr / de->pic_width_in_ctbs_y;
913 + // Last CTB of previous slice
914 + prev_rs = !s->start_ts ? 0 : s->ctb_addr_ts_to_rs[s->start_ts - 1];
915 + s->prev_ctb_x = prev_rs % de->pic_width_in_ctbs_y;
916 + s->prev_ctb_y = prev_rs / de->pic_width_in_ctbs_y;
918 if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
919 - wpp_decode_slice(de, s, sh, ctb_addr_ts);
920 + wpp_decode_slice(de, s);
922 - decode_slice(de, s, sh, ctb_addr_ts);
923 + decode_slice(de, s);
926 + if (!s->frame_end) {
927 + xtrace_ok(dev, de);
933 @@ -1776,8 +1800,9 @@ static void rpivid_h265_setup(struct rpi
935 * Need Aux ents for all (ref) DPB ents if temporal MV could
936 * be enabled for any pic
937 - * ** At the moment we have aux ents for all pics whether or not
939 + * ** At the moment we create aux ents for all pics whether or not
940 + * they are ref - they should then be discarded by the DPB-aux
941 + * garbage collection code
943 use_aux = ((s->sps.flags &
944 V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) != 0);
945 @@ -1795,7 +1820,7 @@ static void rpivid_h265_setup(struct rpi
948 // v4l2_info(&dev->v4l2_dev, "rpivid_h265_end of frame\n");
949 - if (frame_end(dev, de, s))
950 + if (write_cmd_buffer(dev, de, s))
953 for (i = 0; i < sh->num_active_dpb_entries; ++i) {
954 @@ -1876,6 +1901,7 @@ static void rpivid_h265_setup(struct rpi
957 de->state = RPIVID_DECODE_PHASE1;
958 + xtrace_ok(dev, de);
962 @@ -1883,6 +1909,7 @@ fail:
963 // Actual error reporting happens in Trigger
964 de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE :
965 RPIVID_DECODE_ERROR_CONTINUE;
966 + xtrace_fail(dev, de);
969 //////////////////////////////////////////////////////////////////////////////
970 @@ -2210,6 +2237,10 @@ static int rpivid_h265_start(struct rpiv
974 +#if DEBUG_TRACE_P1_CMD
978 // Generate a sanitised WxH for memory alloc
979 // Assume HD if unset