]>
Commit | Line | Data |
---|---|---|
b88baab8 DK |
1 | // SPDX-License-Identifier: MIT |
2 | ||
3 | #include <linux/slab.h> | |
4 | #include <drm/gpu_scheduler.h> | |
5 | #include <drm/drm_syncobj.h> | |
6 | ||
7 | #include "nouveau_drv.h" | |
8 | #include "nouveau_gem.h" | |
9 | #include "nouveau_mem.h" | |
10 | #include "nouveau_dma.h" | |
11 | #include "nouveau_exec.h" | |
12 | #include "nouveau_abi16.h" | |
13 | #include "nouveau_sched.h" | |
14 | ||
15 | /* FIXME | |
16 | * | |
17 | * We want to make sure that jobs currently executing can't be deferred by | |
18 | * other jobs competing for the hardware. Otherwise we might end up with job | |
19 | * timeouts just because of too many clients submitting too many jobs. We don't | |
20 | * want jobs to time out because of system load, but because of the job being | |
21 | * too bulky. | |
22 | * | |
23 | * For now allow for up to 16 concurrent jobs in flight until we know how many | |
24 | * rings the hardware can process in parallel. | |
25 | */ | |
26 | #define NOUVEAU_SCHED_HW_SUBMISSIONS 16 | |
27 | #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 | |
28 | ||
29 | int | |
30 | nouveau_job_init(struct nouveau_job *job, | |
31 | struct nouveau_job_args *args) | |
32 | { | |
33 | struct nouveau_sched_entity *entity = args->sched_entity; | |
34 | int ret; | |
35 | ||
36 | job->file_priv = args->file_priv; | |
37 | job->cli = nouveau_cli(args->file_priv); | |
38 | job->entity = entity; | |
39 | ||
40 | job->sync = args->sync; | |
41 | job->resv_usage = args->resv_usage; | |
42 | ||
43 | job->ops = args->ops; | |
44 | ||
45 | job->in_sync.count = args->in_sync.count; | |
46 | if (job->in_sync.count) { | |
47 | if (job->sync) | |
48 | return -EINVAL; | |
49 | ||
50 | job->in_sync.data = kmemdup(args->in_sync.s, | |
51 | sizeof(*args->in_sync.s) * | |
52 | args->in_sync.count, | |
53 | GFP_KERNEL); | |
54 | if (!job->in_sync.data) | |
55 | return -ENOMEM; | |
56 | } | |
57 | ||
58 | job->out_sync.count = args->out_sync.count; | |
59 | if (job->out_sync.count) { | |
60 | if (job->sync) { | |
61 | ret = -EINVAL; | |
62 | goto err_free_in_sync; | |
63 | } | |
64 | ||
65 | job->out_sync.data = kmemdup(args->out_sync.s, | |
66 | sizeof(*args->out_sync.s) * | |
67 | args->out_sync.count, | |
68 | GFP_KERNEL); | |
69 | if (!job->out_sync.data) { | |
70 | ret = -ENOMEM; | |
71 | goto err_free_in_sync; | |
72 | } | |
73 | ||
74 | job->out_sync.objs = kcalloc(job->out_sync.count, | |
75 | sizeof(*job->out_sync.objs), | |
76 | GFP_KERNEL); | |
77 | if (!job->out_sync.objs) { | |
78 | ret = -ENOMEM; | |
79 | goto err_free_out_sync; | |
80 | } | |
81 | ||
82 | job->out_sync.chains = kcalloc(job->out_sync.count, | |
83 | sizeof(*job->out_sync.chains), | |
84 | GFP_KERNEL); | |
85 | if (!job->out_sync.chains) { | |
86 | ret = -ENOMEM; | |
87 | goto err_free_objs; | |
88 | } | |
89 | ||
90 | } | |
91 | ||
92 | ret = drm_sched_job_init(&job->base, &entity->base, NULL); | |
93 | if (ret) | |
94 | goto err_free_chains; | |
95 | ||
96 | job->state = NOUVEAU_JOB_INITIALIZED; | |
97 | ||
98 | return 0; | |
99 | ||
100 | err_free_chains: | |
101 | kfree(job->out_sync.chains); | |
102 | err_free_objs: | |
103 | kfree(job->out_sync.objs); | |
104 | err_free_out_sync: | |
105 | kfree(job->out_sync.data); | |
106 | err_free_in_sync: | |
107 | kfree(job->in_sync.data); | |
108 | return ret; | |
109 | } | |
110 | ||
111 | void | |
112 | nouveau_job_free(struct nouveau_job *job) | |
113 | { | |
114 | kfree(job->in_sync.data); | |
115 | kfree(job->out_sync.data); | |
116 | kfree(job->out_sync.objs); | |
117 | kfree(job->out_sync.chains); | |
118 | } | |
119 | ||
120 | void nouveau_job_fini(struct nouveau_job *job) | |
121 | { | |
122 | dma_fence_put(job->done_fence); | |
123 | drm_sched_job_cleanup(&job->base); | |
124 | job->ops->free(job); | |
125 | } | |
126 | ||
127 | static int | |
128 | sync_find_fence(struct nouveau_job *job, | |
129 | struct drm_nouveau_sync *sync, | |
130 | struct dma_fence **fence) | |
131 | { | |
132 | u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK; | |
133 | u64 point = 0; | |
134 | int ret; | |
135 | ||
136 | if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ && | |
137 | stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) | |
138 | return -EOPNOTSUPP; | |
139 | ||
140 | if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) | |
141 | point = sync->timeline_value; | |
142 | ||
143 | ret = drm_syncobj_find_fence(job->file_priv, | |
144 | sync->handle, point, | |
e05f3938 | 145 | 0 /* flags */, fence); |
b88baab8 DK |
146 | if (ret) |
147 | return ret; | |
148 | ||
149 | return 0; | |
150 | } | |
151 | ||
152 | static int | |
153 | nouveau_job_add_deps(struct nouveau_job *job) | |
154 | { | |
155 | struct dma_fence *in_fence = NULL; | |
156 | int ret, i; | |
157 | ||
158 | for (i = 0; i < job->in_sync.count; i++) { | |
159 | struct drm_nouveau_sync *sync = &job->in_sync.data[i]; | |
160 | ||
161 | ret = sync_find_fence(job, sync, &in_fence); | |
162 | if (ret) { | |
163 | NV_PRINTK(warn, job->cli, | |
164 | "Failed to find syncobj (-> in): handle=%d\n", | |
165 | sync->handle); | |
166 | return ret; | |
167 | } | |
168 | ||
169 | ret = drm_sched_job_add_dependency(&job->base, in_fence); | |
170 | if (ret) | |
171 | return ret; | |
172 | } | |
173 | ||
174 | return 0; | |
175 | } | |
176 | ||
177 | static void | |
178 | nouveau_job_fence_attach_cleanup(struct nouveau_job *job) | |
179 | { | |
180 | int i; | |
181 | ||
182 | for (i = 0; i < job->out_sync.count; i++) { | |
183 | struct drm_syncobj *obj = job->out_sync.objs[i]; | |
184 | struct dma_fence_chain *chain = job->out_sync.chains[i]; | |
185 | ||
186 | if (obj) | |
187 | drm_syncobj_put(obj); | |
188 | ||
189 | if (chain) | |
190 | dma_fence_chain_free(chain); | |
191 | } | |
192 | } | |
193 | ||
194 | static int | |
195 | nouveau_job_fence_attach_prepare(struct nouveau_job *job) | |
196 | { | |
197 | int i, ret; | |
198 | ||
199 | for (i = 0; i < job->out_sync.count; i++) { | |
200 | struct drm_nouveau_sync *sync = &job->out_sync.data[i]; | |
201 | struct drm_syncobj **pobj = &job->out_sync.objs[i]; | |
202 | struct dma_fence_chain **pchain = &job->out_sync.chains[i]; | |
203 | u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK; | |
204 | ||
205 | if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ && | |
206 | stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) { | |
207 | ret = -EINVAL; | |
208 | goto err_sync_cleanup; | |
209 | } | |
210 | ||
211 | *pobj = drm_syncobj_find(job->file_priv, sync->handle); | |
212 | if (!*pobj) { | |
213 | NV_PRINTK(warn, job->cli, | |
214 | "Failed to find syncobj (-> out): handle=%d\n", | |
215 | sync->handle); | |
216 | ret = -ENOENT; | |
217 | goto err_sync_cleanup; | |
218 | } | |
219 | ||
220 | if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) { | |
221 | *pchain = dma_fence_chain_alloc(); | |
222 | if (!*pchain) { | |
223 | ret = -ENOMEM; | |
224 | goto err_sync_cleanup; | |
225 | } | |
226 | } | |
227 | } | |
228 | ||
229 | return 0; | |
230 | ||
231 | err_sync_cleanup: | |
232 | nouveau_job_fence_attach_cleanup(job); | |
233 | return ret; | |
234 | } | |
235 | ||
236 | static void | |
237 | nouveau_job_fence_attach(struct nouveau_job *job) | |
238 | { | |
239 | struct dma_fence *fence = job->done_fence; | |
240 | int i; | |
241 | ||
242 | for (i = 0; i < job->out_sync.count; i++) { | |
243 | struct drm_nouveau_sync *sync = &job->out_sync.data[i]; | |
244 | struct drm_syncobj **pobj = &job->out_sync.objs[i]; | |
245 | struct dma_fence_chain **pchain = &job->out_sync.chains[i]; | |
246 | u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK; | |
247 | ||
248 | if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) { | |
249 | drm_syncobj_add_point(*pobj, *pchain, fence, | |
250 | sync->timeline_value); | |
251 | } else { | |
252 | drm_syncobj_replace_fence(*pobj, fence); | |
253 | } | |
254 | ||
255 | drm_syncobj_put(*pobj); | |
256 | *pobj = NULL; | |
257 | *pchain = NULL; | |
258 | } | |
259 | } | |
260 | ||
261 | int | |
262 | nouveau_job_submit(struct nouveau_job *job) | |
263 | { | |
264 | struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity); | |
265 | struct dma_fence *done_fence = NULL; | |
266 | int ret; | |
267 | ||
268 | ret = nouveau_job_add_deps(job); | |
269 | if (ret) | |
270 | goto err; | |
271 | ||
272 | ret = nouveau_job_fence_attach_prepare(job); | |
273 | if (ret) | |
274 | goto err; | |
275 | ||
276 | /* Make sure the job appears on the sched_entity's queue in the same | |
277 | * order as it was submitted. | |
278 | */ | |
279 | mutex_lock(&entity->mutex); | |
280 | ||
281 | /* Guarantee we won't fail after the submit() callback returned | |
282 | * successfully. | |
283 | */ | |
284 | if (job->ops->submit) { | |
285 | ret = job->ops->submit(job); | |
286 | if (ret) | |
287 | goto err_cleanup; | |
288 | } | |
289 | ||
290 | drm_sched_job_arm(&job->base); | |
291 | job->done_fence = dma_fence_get(&job->base.s_fence->finished); | |
292 | if (job->sync) | |
293 | done_fence = dma_fence_get(job->done_fence); | |
294 | ||
6cdcc65f DK |
295 | /* If a sched job depends on a dma-fence from a job from the same GPU |
296 | * scheduler instance, but a different scheduler entity, the GPU | |
297 | * scheduler does only wait for the particular job to be scheduled, | |
298 | * rather than for the job to fully complete. This is due to the GPU | |
299 | * scheduler assuming that there is a scheduler instance per ring. | |
300 | * However, the current implementation, in order to avoid arbitrary | |
301 | * amounts of kthreads, has a single scheduler instance while scheduler | |
302 | * entities represent rings. | |
303 | * | |
304 | * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all | |
305 | * out-fences in order to force the scheduler to wait for full job | |
306 | * completion for dependent jobs from different entities and same | |
307 | * scheduler instance. | |
308 | * | |
309 | * There is some work in progress [1] to address the issues of firmware | |
310 | * schedulers; once it is in-tree the scheduler topology in Nouveau | |
311 | * should be re-worked accordingly. | |
312 | * | |
313 | * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/ | |
314 | */ | |
315 | set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags); | |
316 | ||
b88baab8 DK |
317 | if (job->ops->armed_submit) |
318 | job->ops->armed_submit(job); | |
319 | ||
320 | nouveau_job_fence_attach(job); | |
321 | ||
322 | /* Set job state before pushing the job to the scheduler, | |
323 | * such that we do not overwrite the job state set in run(). | |
324 | */ | |
325 | job->state = NOUVEAU_JOB_SUBMIT_SUCCESS; | |
326 | ||
327 | drm_sched_entity_push_job(&job->base); | |
328 | ||
329 | mutex_unlock(&entity->mutex); | |
330 | ||
331 | if (done_fence) { | |
332 | dma_fence_wait(done_fence, true); | |
333 | dma_fence_put(done_fence); | |
334 | } | |
335 | ||
336 | return 0; | |
337 | ||
338 | err_cleanup: | |
339 | mutex_unlock(&entity->mutex); | |
340 | nouveau_job_fence_attach_cleanup(job); | |
341 | err: | |
342 | job->state = NOUVEAU_JOB_SUBMIT_FAILED; | |
343 | return ret; | |
344 | } | |
345 | ||
346 | bool | |
347 | nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity, | |
348 | struct work_struct *work) | |
349 | { | |
350 | return queue_work(entity->sched_wq, work); | |
351 | } | |
352 | ||
353 | static struct dma_fence * | |
354 | nouveau_job_run(struct nouveau_job *job) | |
355 | { | |
356 | struct dma_fence *fence; | |
357 | ||
358 | fence = job->ops->run(job); | |
359 | if (IS_ERR(fence)) | |
360 | job->state = NOUVEAU_JOB_RUN_FAILED; | |
361 | else | |
362 | job->state = NOUVEAU_JOB_RUN_SUCCESS; | |
363 | ||
364 | return fence; | |
365 | } | |
366 | ||
367 | static struct dma_fence * | |
368 | nouveau_sched_run_job(struct drm_sched_job *sched_job) | |
369 | { | |
370 | struct nouveau_job *job = to_nouveau_job(sched_job); | |
371 | ||
372 | return nouveau_job_run(job); | |
373 | } | |
374 | ||
375 | static enum drm_gpu_sched_stat | |
376 | nouveau_sched_timedout_job(struct drm_sched_job *sched_job) | |
377 | { | |
31499b01 | 378 | struct drm_gpu_scheduler *sched = sched_job->sched; |
b88baab8 | 379 | struct nouveau_job *job = to_nouveau_job(sched_job); |
31499b01 | 380 | enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL; |
b88baab8 | 381 | |
31499b01 | 382 | drm_sched_stop(sched, sched_job); |
b88baab8 DK |
383 | |
384 | if (job->ops->timeout) | |
31499b01 DK |
385 | stat = job->ops->timeout(job); |
386 | else | |
387 | NV_PRINTK(warn, job->cli, "Generic job timeout.\n"); | |
388 | ||
389 | drm_sched_start(sched, true); | |
b88baab8 | 390 | |
31499b01 | 391 | return stat; |
b88baab8 DK |
392 | } |
393 | ||
394 | static void | |
395 | nouveau_sched_free_job(struct drm_sched_job *sched_job) | |
396 | { | |
397 | struct nouveau_job *job = to_nouveau_job(sched_job); | |
398 | ||
399 | nouveau_job_fini(job); | |
400 | } | |
401 | ||
402 | int nouveau_sched_entity_init(struct nouveau_sched_entity *entity, | |
403 | struct drm_gpu_scheduler *sched, | |
404 | struct workqueue_struct *sched_wq) | |
405 | { | |
406 | mutex_init(&entity->mutex); | |
407 | spin_lock_init(&entity->job.list.lock); | |
408 | INIT_LIST_HEAD(&entity->job.list.head); | |
409 | init_waitqueue_head(&entity->job.wq); | |
410 | ||
411 | entity->sched_wq = sched_wq; | |
412 | return drm_sched_entity_init(&entity->base, | |
413 | DRM_SCHED_PRIORITY_NORMAL, | |
414 | &sched, 1, NULL); | |
415 | } | |
416 | ||
417 | void | |
418 | nouveau_sched_entity_fini(struct nouveau_sched_entity *entity) | |
419 | { | |
420 | drm_sched_entity_destroy(&entity->base); | |
421 | } | |
422 | ||
423 | static const struct drm_sched_backend_ops nouveau_sched_ops = { | |
424 | .run_job = nouveau_sched_run_job, | |
425 | .timedout_job = nouveau_sched_timedout_job, | |
426 | .free_job = nouveau_sched_free_job, | |
427 | }; | |
428 | ||
429 | int nouveau_sched_init(struct nouveau_drm *drm) | |
430 | { | |
431 | struct drm_gpu_scheduler *sched = &drm->sched; | |
432 | long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS); | |
433 | ||
434 | drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq"); | |
435 | if (!drm->sched_wq) | |
436 | return -ENOMEM; | |
437 | ||
438 | return drm_sched_init(sched, &nouveau_sched_ops, | |
439 | NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, | |
440 | NULL, NULL, "nouveau_sched", drm->dev->dev); | |
441 | } | |
442 | ||
443 | void nouveau_sched_fini(struct nouveau_drm *drm) | |
444 | { | |
445 | destroy_workqueue(drm->sched_wq); | |
446 | drm_sched_fini(&drm->sched); | |
447 | } |