1f765aeb047dc0849e34ba993299d3f06bc58f11
[openwrt/staging/rmilecki.git] /
1 From a0bc59127231cbea506651c362d4836a0ff5591f Mon Sep 17 00:00:00 2001
2 From: Maxime Ripard <maxime@cerno.tech>
3 Date: Wed, 8 Sep 2021 21:12:26 +0200
4 Subject: [PATCH] drm/vc4: Fix out of order frames during asynchronous
5 page flips
6
7 When doing an asynchronous page flip (PAGE_FLIP ioctl with the
8 DRM_MODE_PAGE_FLIP_ASYNC flag set), the current code waits for the
9 possible GPU buffer being rendered through a call to
10 vc4_queue_seqno_cb().
11
12 On the BCM2835-37, the GPU driver is part of the vc4 driver and that
13 function is defined in vc4_gem.c to wait for the buffer to be rendered,
14 and once it's done, call a callback.
15
16 However, on the BCM2711 used on the RaspberryPi4, the GPU driver is
17 separate (v3d) and that function won't do anything. This was working
18 because we were going into a path, due to uninitialized variables, that
19 was always scheduling the callback.
20
21 However, we were never actually waiting for the buffer to be rendered
22 which was resulting in frames being displayed out of order.
23
24 The generic API to signal those kind of completion in the kernel are the
25 DMA fences, and fortunately the v3d drivers supports them and signal
26 when its job is done. That API also provides an equivalent function that
27 allows to have a callback being executed when the fence is signalled as
28 done.
29
30 Let's change our driver a bit to rely on the previous function for the
31 older SoCs, and on DMA fences for the BCM2711.
32
33 Signed-off-by: Maxime Ripard <maxime@cerno.tech>
34 ---
35 drivers/gpu/drm/vc4/vc4_crtc.c | 37 ++++++++++++++++++++++++++++++++--
36 1 file changed, 35 insertions(+), 2 deletions(-)
37
38 --- a/drivers/gpu/drm/vc4/vc4_crtc.c
39 +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
40 @@ -797,6 +797,7 @@ struct vc4_async_flip_state {
41 struct drm_pending_vblank_event *event;
42
43 struct vc4_seqno_cb cb;
44 + struct dma_fence_cb fence_cb;
45 };
46
47 /* Called when the V3D execution for the BO being flipped to is done, so that
48 @@ -842,6 +843,39 @@ vc4_async_page_flip_complete(struct vc4_
49 kfree(flip_state);
50 }
51
52 +static void vc4_async_page_flip_fence_complete(struct dma_fence *fence,
53 + struct dma_fence_cb *cb)
54 +{
55 + struct vc4_async_flip_state *flip_state =
56 + container_of(cb, struct vc4_async_flip_state, fence_cb);
57 +
58 + vc4_async_page_flip_complete(&flip_state->cb);
59 + dma_fence_put(fence);
60 +}
61 +
62 +static int vc4_async_set_fence_cb(struct drm_device *dev,
63 + struct vc4_async_flip_state *flip_state)
64 +{
65 + struct drm_framebuffer *fb = flip_state->fb;
66 + struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
67 + struct vc4_dev *vc4 = to_vc4_dev(dev);
68 + struct dma_fence *fence;
69 +
70 + if (!vc4->hvs->hvs5) {
71 + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
72 +
73 + return vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
74 + vc4_async_page_flip_complete);
75 + }
76 +
77 + fence = dma_fence_get(dma_resv_excl_fence(cma_bo->base.resv));
78 + if (dma_fence_add_callback(fence, &flip_state->fence_cb,
79 + vc4_async_page_flip_fence_complete))
80 + vc4_async_page_flip_fence_complete(fence, &flip_state->fence_cb);
81 +
82 + return 0;
83 +}
84 +
85 /* Implements async (non-vblank-synced) page flips.
86 *
87 * The page flip ioctl needs to return immediately, so we grab the
88 @@ -902,8 +936,7 @@ static int vc4_async_page_flip(struct dr
89 */
90 drm_atomic_set_fb_for_plane(plane->state, fb);
91
92 - vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
93 - vc4_async_page_flip_complete);
94 + vc4_async_set_fence_cb(dev, flip_state);
95
96 /* Driver takes ownership of state on successful async commit. */
97 return 0;