36cac3fba0888596c33e7215bb7ab642bdc79521
[openwrt/openwrt.git] /
1 From 45825b7a8fe40a6b8d03312cc433713b0cc4b63f Mon Sep 17 00:00:00 2001
2 From: Eric Anholt <eric@anholt.net>
3 Date: Fri, 30 Nov 2018 16:57:58 -0800
4 Subject: [PATCH 567/773] drm/v3d: Add more tracepoints for V3D GPU rendering.
5
6 The core scheduler tells us when the job is pushed to the scheduler's
7 queue, and I had the job_run functions saying when they actually queue
8 the job to the hardware. By adding tracepoints for the very top of
9 the ioctls and the IRQs signaling job completion, "perf record -a -e
10 v3d:.\* -e gpu_scheduler:.\* <job>; perf script" gets you a pretty
11 decent timeline.
12
13 Signed-off-by: Eric Anholt <eric@anholt.net>
14 Link: https://patchwork.freedesktop.org/patch/msgid/20181201005759.28093-5-eric@anholt.net
15 Reviewed-by: Dave Emett <david.emett@broadcom.com>
16 (cherry picked from commit 55a9b74846ed5e6219c7d81a8e1bf96f25d8ad5e)
17 ---
18 drivers/gpu/drm/v3d/v3d_gem.c | 4 ++
19 drivers/gpu/drm/v3d/v3d_irq.c | 19 +++++-
20 drivers/gpu/drm/v3d/v3d_trace.h | 101 ++++++++++++++++++++++++++++++++
21 3 files changed, 121 insertions(+), 3 deletions(-)
22
23 --- a/drivers/gpu/drm/v3d/v3d_gem.c
24 +++ b/drivers/gpu/drm/v3d/v3d_gem.c
25 @@ -521,6 +521,8 @@ v3d_submit_cl_ioctl(struct drm_device *d
26 struct drm_syncobj *sync_out;
27 int ret = 0;
28
29 + trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
30 +
31 if (args->pad != 0) {
32 DRM_INFO("pad must be zero: %d\n", args->pad);
33 return -EINVAL;
34 @@ -648,6 +650,8 @@ v3d_submit_tfu_ioctl(struct drm_device *
35 int ret = 0;
36 int bo_count;
37
38 + trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
39 +
40 job = kcalloc(1, sizeof(*job), GFP_KERNEL);
41 if (!job)
42 return -ENOMEM;
43 --- a/drivers/gpu/drm/v3d/v3d_irq.c
44 +++ b/drivers/gpu/drm/v3d/v3d_irq.c
45 @@ -15,6 +15,7 @@
46
47 #include "v3d_drv.h"
48 #include "v3d_regs.h"
49 +#include "v3d_trace.h"
50
51 #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \
52 V3D_INT_FLDONE | \
53 @@ -88,12 +89,20 @@ v3d_irq(int irq, void *arg)
54 }
55
56 if (intsts & V3D_INT_FLDONE) {
57 - dma_fence_signal(v3d->bin_job->bin.done_fence);
58 + struct v3d_fence *fence =
59 + to_v3d_fence(v3d->bin_job->bin.done_fence);
60 +
61 + trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
62 + dma_fence_signal(&fence->base);
63 status = IRQ_HANDLED;
64 }
65
66 if (intsts & V3D_INT_FRDONE) {
67 - dma_fence_signal(v3d->render_job->render.done_fence);
68 + struct v3d_fence *fence =
69 + to_v3d_fence(v3d->render_job->render.done_fence);
70 +
71 + trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
72 + dma_fence_signal(&fence->base);
73 status = IRQ_HANDLED;
74 }
75
76 @@ -119,7 +128,11 @@ v3d_hub_irq(int irq, void *arg)
77 V3D_WRITE(V3D_HUB_INT_CLR, intsts);
78
79 if (intsts & V3D_HUB_INT_TFUC) {
80 - dma_fence_signal(v3d->tfu_job->done_fence);
81 + struct v3d_fence *fence =
82 + to_v3d_fence(v3d->tfu_job->done_fence);
83 +
84 + trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
85 + dma_fence_signal(&fence->base);
86 status = IRQ_HANDLED;
87 }
88
89 --- a/drivers/gpu/drm/v3d/v3d_trace.h
90 +++ b/drivers/gpu/drm/v3d/v3d_trace.h
91 @@ -12,6 +12,28 @@
92 #define TRACE_SYSTEM v3d
93 #define TRACE_INCLUDE_FILE v3d_trace
94
95 +TRACE_EVENT(v3d_submit_cl_ioctl,
96 + TP_PROTO(struct drm_device *dev, u32 ct1qba, u32 ct1qea),
97 + TP_ARGS(dev, ct1qba, ct1qea),
98 +
99 + TP_STRUCT__entry(
100 + __field(u32, dev)
101 + __field(u32, ct1qba)
102 + __field(u32, ct1qea)
103 + ),
104 +
105 + TP_fast_assign(
106 + __entry->dev = dev->primary->index;
107 + __entry->ct1qba = ct1qba;
108 + __entry->ct1qea = ct1qea;
109 + ),
110 +
111 + TP_printk("dev=%u, RCL 0x%08x..0x%08x",
112 + __entry->dev,
113 + __entry->ct1qba,
114 + __entry->ct1qea)
115 +);
116 +
117 TRACE_EVENT(v3d_submit_cl,
118 TP_PROTO(struct drm_device *dev, bool is_render,
119 uint64_t seqno,
120 @@ -42,6 +64,85 @@ TRACE_EVENT(v3d_submit_cl,
121 __entry->ctnqea)
122 );
123
124 +TRACE_EVENT(v3d_bcl_irq,
125 + TP_PROTO(struct drm_device *dev,
126 + uint64_t seqno),
127 + TP_ARGS(dev, seqno),
128 +
129 + TP_STRUCT__entry(
130 + __field(u32, dev)
131 + __field(u64, seqno)
132 + ),
133 +
134 + TP_fast_assign(
135 + __entry->dev = dev->primary->index;
136 + __entry->seqno = seqno;
137 + ),
138 +
139 + TP_printk("dev=%u, seqno=%llu",
140 + __entry->dev,
141 + __entry->seqno)
142 +);
143 +
144 +TRACE_EVENT(v3d_rcl_irq,
145 + TP_PROTO(struct drm_device *dev,
146 + uint64_t seqno),
147 + TP_ARGS(dev, seqno),
148 +
149 + TP_STRUCT__entry(
150 + __field(u32, dev)
151 + __field(u64, seqno)
152 + ),
153 +
154 + TP_fast_assign(
155 + __entry->dev = dev->primary->index;
156 + __entry->seqno = seqno;
157 + ),
158 +
159 + TP_printk("dev=%u, seqno=%llu",
160 + __entry->dev,
161 + __entry->seqno)
162 +);
163 +
164 +TRACE_EVENT(v3d_tfu_irq,
165 + TP_PROTO(struct drm_device *dev,
166 + uint64_t seqno),
167 + TP_ARGS(dev, seqno),
168 +
169 + TP_STRUCT__entry(
170 + __field(u32, dev)
171 + __field(u64, seqno)
172 + ),
173 +
174 + TP_fast_assign(
175 + __entry->dev = dev->primary->index;
176 + __entry->seqno = seqno;
177 + ),
178 +
179 + TP_printk("dev=%u, seqno=%llu",
180 + __entry->dev,
181 + __entry->seqno)
182 +);
183 +
184 +TRACE_EVENT(v3d_submit_tfu_ioctl,
185 + TP_PROTO(struct drm_device *dev, u32 iia),
186 + TP_ARGS(dev, iia),
187 +
188 + TP_STRUCT__entry(
189 + __field(u32, dev)
190 + __field(u32, iia)
191 + ),
192 +
193 + TP_fast_assign(
194 + __entry->dev = dev->primary->index;
195 + __entry->iia = iia;
196 + ),
197 +
198 + TP_printk("dev=%u, IIA 0x%08x",
199 + __entry->dev,
200 + __entry->iia)
201 +);
202 +
203 TRACE_EVENT(v3d_submit_tfu,
204 TP_PROTO(struct drm_device *dev,
205 uint64_t seqno),