[thirdparty/kernel/stable.git] / drivers / gpu / drm / nouveau / nouveau_sched.c

// SPDX-License-Identifier: MIT

#include <linux/slab.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_syncobj.h>

#include "nouveau_drv.h"
#include "nouveau_gem.h"
#include "nouveau_mem.h"
#include "nouveau_dma.h"
#include "nouveau_exec.h"
#include "nouveau_abi16.h"
#include "nouveau_sched.h"

/* FIXME
 *
 * We want to make sure that jobs currently executing can't be deferred by
 * other jobs competing for the hardware. Otherwise we might end up with job
 * timeouts just because of too many clients submitting too many jobs. We don't
 * want jobs to time out because of system load, but because of the job being
 * too bulky.
 *
 * For now allow for up to 16 concurrent jobs in flight until we know how many
 * rings the hardware can process in parallel.
 */
#define NOUVEAU_SCHED_HW_SUBMISSIONS		16
#define NOUVEAU_SCHED_JOB_TIMEOUT_MS		10000

int
nouveau_job_init(struct nouveau_job *job,
		 struct nouveau_job_args *args)
{
	struct nouveau_sched_entity *entity = args->sched_entity;
	int ret;

	job->file_priv = args->file_priv;
	job->cli = nouveau_cli(args->file_priv);
	job->entity = entity;

	job->sync = args->sync;
	job->resv_usage = args->resv_usage;

	job->ops = args->ops;

	job->in_sync.count = args->in_sync.count;
	if (job->in_sync.count) {
		if (job->sync)
			return -EINVAL;

		job->in_sync.data = kmemdup(args->in_sync.s,
					 sizeof(*args->in_sync.s) *
					 args->in_sync.count,
					 GFP_KERNEL);
		if (!job->in_sync.data)
			return -ENOMEM;
	}

	job->out_sync.count = args->out_sync.count;
	if (job->out_sync.count) {
		if (job->sync) {
			ret = -EINVAL;
			goto err_free_in_sync;
		}

		job->out_sync.data = kmemdup(args->out_sync.s,
					  sizeof(*args->out_sync.s) *
					  args->out_sync.count,
					  GFP_KERNEL);
		if (!job->out_sync.data) {
			ret = -ENOMEM;
			goto err_free_in_sync;
		}

		job->out_sync.objs = kcalloc(job->out_sync.count,
					     sizeof(*job->out_sync.objs),
					     GFP_KERNEL);
		if (!job->out_sync.objs) {
			ret = -ENOMEM;
			goto err_free_out_sync;
		}

		job->out_sync.chains = kcalloc(job->out_sync.count,
					       sizeof(*job->out_sync.chains),
					       GFP_KERNEL);
		if (!job->out_sync.chains) {
			ret = -ENOMEM;
			goto err_free_objs;
		}

	}

	ret = drm_sched_job_init(&job->base, &entity->base, NULL);
	if (ret)
		goto err_free_chains;

	job->state = NOUVEAU_JOB_INITIALIZED;

	return 0;

err_free_chains:
	kfree(job->out_sync.chains);
err_free_objs:
	kfree(job->out_sync.objs);
err_free_out_sync:
	kfree(job->out_sync.data);
err_free_in_sync:
	kfree(job->in_sync.data);
return ret;
}

void
nouveau_job_free(struct nouveau_job *job)
{
	kfree(job->in_sync.data);
	kfree(job->out_sync.data);
	kfree(job->out_sync.objs);
	kfree(job->out_sync.chains);
}

void nouveau_job_fini(struct nouveau_job *job)
{
	dma_fence_put(job->done_fence);
	drm_sched_job_cleanup(&job->base);
	job->ops->free(job);
}

static int
sync_find_fence(struct nouveau_job *job,
		struct drm_nouveau_sync *sync,
		struct dma_fence **fence)
{
	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
	u64 point = 0;
	int ret;

	if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
	    stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
		return -EOPNOTSUPP;

	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
		point = sync->timeline_value;

	ret = drm_syncobj_find_fence(job->file_priv,
				     sync->handle, point,
				     0 /* flags */, fence);
	if (ret)
		return ret;

	return 0;
}

static int
nouveau_job_add_deps(struct nouveau_job *job)
{
	struct dma_fence *in_fence = NULL;
	int ret, i;

	for (i = 0; i < job->in_sync.count; i++) {
		struct drm_nouveau_sync *sync = &job->in_sync.data[i];

		ret = sync_find_fence(job, sync, &in_fence);
		if (ret) {
			NV_PRINTK(warn, job->cli,
				  "Failed to find syncobj (-> in): handle=%d\n",
				  sync->handle);
			return ret;
		}

		ret = drm_sched_job_add_dependency(&job->base, in_fence);
		if (ret)
			return ret;
	}

	return 0;
}

static void
nouveau_job_fence_attach_cleanup(struct nouveau_job *job)
{
	int i;

	for (i = 0; i < job->out_sync.count; i++) {
		struct drm_syncobj *obj = job->out_sync.objs[i];
		struct dma_fence_chain *chain = job->out_sync.chains[i];

		if (obj)
			drm_syncobj_put(obj);

		if (chain)
			dma_fence_chain_free(chain);
	}
}

static int
nouveau_job_fence_attach_prepare(struct nouveau_job *job)
{
	int i, ret;

	for (i = 0; i < job->out_sync.count; i++) {
		struct drm_nouveau_sync *sync = &job->out_sync.data[i];
		struct drm_syncobj **pobj = &job->out_sync.objs[i];
		struct dma_fence_chain **pchain = &job->out_sync.chains[i];
		u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;

		if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
		    stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
			ret = -EINVAL;
			goto err_sync_cleanup;
		}

		*pobj = drm_syncobj_find(job->file_priv, sync->handle);
		if (!*pobj) {
			NV_PRINTK(warn, job->cli,
				  "Failed to find syncobj (-> out): handle=%d\n",
				  sync->handle);
			ret = -ENOENT;
			goto err_sync_cleanup;
		}

		if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
			*pchain = dma_fence_chain_alloc();
			if (!*pchain) {
				ret = -ENOMEM;
				goto err_sync_cleanup;
			}
		}
	}

	return 0;

err_sync_cleanup:
	nouveau_job_fence_attach_cleanup(job);
	return ret;
}

static void
nouveau_job_fence_attach(struct nouveau_job *job)
{
	struct dma_fence *fence = job->done_fence;
	int i;

	for (i = 0; i < job->out_sync.count; i++) {
		struct drm_nouveau_sync *sync = &job->out_sync.data[i];
		struct drm_syncobj **pobj = &job->out_sync.objs[i];
		struct dma_fence_chain **pchain = &job->out_sync.chains[i];
		u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;

		if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
			drm_syncobj_add_point(*pobj, *pchain, fence,
					      sync->timeline_value);
		} else {
			drm_syncobj_replace_fence(*pobj, fence);
		}

		drm_syncobj_put(*pobj);
		*pobj = NULL;
		*pchain = NULL;
	}
}

int
nouveau_job_submit(struct nouveau_job *job)
{
	struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
	struct dma_fence *done_fence = NULL;
	int ret;

	ret = nouveau_job_add_deps(job);
	if (ret)
		goto err;

	ret = nouveau_job_fence_attach_prepare(job);
	if (ret)
		goto err;

	/* Make sure the job appears on the sched_entity's queue in the same
	 * order as it was submitted.
	 */
	mutex_lock(&entity->mutex);

	/* Guarantee we won't fail after the submit() callback returned
	 * successfully.
	 */
	if (job->ops->submit) {
		ret = job->ops->submit(job);
		if (ret)
			goto err_cleanup;
	}

	drm_sched_job_arm(&job->base);
	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
	if (job->sync)
		done_fence = dma_fence_get(job->done_fence);

	/* If a sched job depends on a dma-fence from a job from the same GPU
	 * scheduler instance, but a different scheduler entity, the GPU
	 * scheduler does only wait for the particular job to be scheduled,
	 * rather than for the job to fully complete. This is due to the GPU
	 * scheduler assuming that there is a scheduler instance per ring.
	 * However, the current implementation, in order to avoid arbitrary
	 * amounts of kthreads, has a single scheduler instance while scheduler
	 * entities represent rings.
	 *
	 * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all
	 * out-fences in order to force the scheduler to wait for full job
	 * completion for dependent jobs from different entities and same
	 * scheduler instance.
	 *
	 * There is some work in progress [1] to address the issues of firmware
	 * schedulers; once it is in-tree the scheduler topology in Nouveau
	 * should be re-worked accordingly.
	 *
	 * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/
	 */
	set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);

	if (job->ops->armed_submit)
		job->ops->armed_submit(job);

	nouveau_job_fence_attach(job);

	/* Set job state before pushing the job to the scheduler,
	 * such that we do not overwrite the job state set in run().
	 */
	job->state = NOUVEAU_JOB_SUBMIT_SUCCESS;

	drm_sched_entity_push_job(&job->base);

	mutex_unlock(&entity->mutex);

	if (done_fence) {
		dma_fence_wait(done_fence, true);
		dma_fence_put(done_fence);
	}

	return 0;

err_cleanup:
	mutex_unlock(&entity->mutex);
	nouveau_job_fence_attach_cleanup(job);
err:
	job->state = NOUVEAU_JOB_SUBMIT_FAILED;
	return ret;
}

bool
nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
			   struct work_struct *work)
{
	return queue_work(entity->sched_wq, work);
}

static struct dma_fence *
nouveau_job_run(struct nouveau_job *job)
{
	struct dma_fence *fence;

	fence = job->ops->run(job);
	if (IS_ERR(fence))
		job->state = NOUVEAU_JOB_RUN_FAILED;
	else
		job->state = NOUVEAU_JOB_RUN_SUCCESS;

	return fence;
}

static struct dma_fence *
nouveau_sched_run_job(struct drm_sched_job *sched_job)
{
	struct nouveau_job *job = to_nouveau_job(sched_job);

	return nouveau_job_run(job);
}

static enum drm_gpu_sched_stat
nouveau_sched_timedout_job(struct drm_sched_job *sched_job)
{
	struct drm_gpu_scheduler *sched = sched_job->sched;
	struct nouveau_job *job = to_nouveau_job(sched_job);
	enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL;

	drm_sched_stop(sched, sched_job);

	if (job->ops->timeout)
		stat = job->ops->timeout(job);
	else
		NV_PRINTK(warn, job->cli, "Generic job timeout.\n");

	drm_sched_start(sched, true);

	return stat;
}

static void
nouveau_sched_free_job(struct drm_sched_job *sched_job)
{
	struct nouveau_job *job = to_nouveau_job(sched_job);

	nouveau_job_fini(job);
}

int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
			      struct drm_gpu_scheduler *sched,
			      struct workqueue_struct *sched_wq)
{
	mutex_init(&entity->mutex);
	spin_lock_init(&entity->job.list.lock);
	INIT_LIST_HEAD(&entity->job.list.head);
	init_waitqueue_head(&entity->job.wq);

	entity->sched_wq = sched_wq;
	return drm_sched_entity_init(&entity->base,
				     DRM_SCHED_PRIORITY_NORMAL,
				     &sched, 1, NULL);
}

void
nouveau_sched_entity_fini(struct nouveau_sched_entity *entity)
{
	drm_sched_entity_destroy(&entity->base);
}

static const struct drm_sched_backend_ops nouveau_sched_ops = {
	.run_job = nouveau_sched_run_job,
	.timedout_job = nouveau_sched_timedout_job,
	.free_job = nouveau_sched_free_job,
};

int nouveau_sched_init(struct nouveau_drm *drm)
{
	struct drm_gpu_scheduler *sched = &drm->sched;
	long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);

	drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq");
	if (!drm->sched_wq)
		return -ENOMEM;

	return drm_sched_init(sched, &nouveau_sched_ops,
			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
			      NULL, NULL, "nouveau_sched", drm->dev->dev);
}

void nouveau_sched_fini(struct nouveau_drm *drm)
{
	destroy_workqueue(drm->sched_wq);
	drm_sched_fini(&drm->sched);
}
Commit	Line	Data
b88baab8 DK	1	// SPDX-License-Identifier: MIT
	2
	3	#include <linux/slab.h>
	4	#include <drm/gpu_scheduler.h>
	5	#include <drm/drm_syncobj.h>
	6
	7	#include "nouveau_drv.h"
	8	#include "nouveau_gem.h"
	9	#include "nouveau_mem.h"
	10	#include "nouveau_dma.h"
	11	#include "nouveau_exec.h"
	12	#include "nouveau_abi16.h"
	13	#include "nouveau_sched.h"
	14
	15	/* FIXME
	16	*
	17	* We want to make sure that jobs currently executing can't be deferred by
	18	* other jobs competing for the hardware. Otherwise we might end up with job
	19	* timeouts just because of too many clients submitting too many jobs. We don't
	20	* want jobs to time out because of system load, but because of the job being
	21	* too bulky.
	22	*
	23	* For now allow for up to 16 concurrent jobs in flight until we know how many
	24	* rings the hardware can process in parallel.
	25	*/
	26	#define NOUVEAU_SCHED_HW_SUBMISSIONS 16
	27	#define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000
	28
	29	int
	30	nouveau_job_init(struct nouveau_job *job,
	31	struct nouveau_job_args *args)
	32	{
	33	struct nouveau_sched_entity *entity = args->sched_entity;
	34	int ret;
	35
	36	job->file_priv = args->file_priv;
	37	job->cli = nouveau_cli(args->file_priv);
	38	job->entity = entity;
	39
	40	job->sync = args->sync;
	41	job->resv_usage = args->resv_usage;
	42
	43	job->ops = args->ops;
	44
	45	job->in_sync.count = args->in_sync.count;
	46	if (job->in_sync.count) {
	47	if (job->sync)
	48	return -EINVAL;
	49
	50	job->in_sync.data = kmemdup(args->in_sync.s,
	51	sizeof(args->in_sync.s)
	52	args->in_sync.count,
	53	GFP_KERNEL);
	54	if (!job->in_sync.data)
	55	return -ENOMEM;
	56	}
	57
	58	job->out_sync.count = args->out_sync.count;
	59	if (job->out_sync.count) {
	60	if (job->sync) {
	61	ret = -EINVAL;
	62	goto err_free_in_sync;
	63	}
	64
65	job->out_sync.data = kmemdup(args->out_sync.s,
66	sizeof(args->out_sync.s)
67	args->out_sync.count,
68	GFP_KERNEL);
69	if (!job->out_sync.data) {
70	ret = -ENOMEM;
71	goto err_free_in_sync;
72	}
73
74	job->out_sync.objs = kcalloc(job->out_sync.count,
75	sizeof(*job->out_sync.objs),
76	GFP_KERNEL);
77	if (!job->out_sync.objs) {
78	ret = -ENOMEM;
79	goto err_free_out_sync;
80	}
81
82	job->out_sync.chains = kcalloc(job->out_sync.count,
83	sizeof(*job->out_sync.chains),
84	GFP_KERNEL);
85	if (!job->out_sync.chains) {
86	ret = -ENOMEM;
87	goto err_free_objs;
88	}
89
90	}
91
92	ret = drm_sched_job_init(&job->base, &entity->base, NULL);
93	if (ret)
94	goto err_free_chains;
95
96	job->state = NOUVEAU_JOB_INITIALIZED;
97
98	return 0;
99
100	err_free_chains:
101	kfree(job->out_sync.chains);
102	err_free_objs:
103	kfree(job->out_sync.objs);
104	err_free_out_sync:
105	kfree(job->out_sync.data);
106	err_free_in_sync:
107	kfree(job->in_sync.data);
108	return ret;
109	}
110
111	void
112	nouveau_job_free(struct nouveau_job *job)
113	{
114	kfree(job->in_sync.data);
115	kfree(job->out_sync.data);
116	kfree(job->out_sync.objs);
117	kfree(job->out_sync.chains);
118	}
119
120	void nouveau_job_fini(struct nouveau_job *job)
121	{
122	dma_fence_put(job->done_fence);
123	drm_sched_job_cleanup(&job->base);
124	job->ops->free(job);
125	}
126
127	static int
128	sync_find_fence(struct nouveau_job *job,
129	struct drm_nouveau_sync *sync,
130	struct dma_fence **fence)
131	{
132	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
133	u64 point = 0;
134	int ret;
135
136	if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
137	stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
138	return -EOPNOTSUPP;
139
140	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
141	point = sync->timeline_value;
142
143	ret = drm_syncobj_find_fence(job->file_priv,
144	sync->handle, point,
e05f3938	145	0 /* flags */, fence);
b88baab8 DK	146	if (ret)
	147	return ret;
	148
	149	return 0;
	150	}
	151
	152	static int
	153	nouveau_job_add_deps(struct nouveau_job *job)
	154	{
	155	struct dma_fence *in_fence = NULL;
	156	int ret, i;
	157
	158	for (i = 0; i < job->in_sync.count; i++) {
	159	struct drm_nouveau_sync *sync = &job->in_sync.data[i];
	160
	161	ret = sync_find_fence(job, sync, &in_fence);
	162	if (ret) {
	163	NV_PRINTK(warn, job->cli,
	164	"Failed to find syncobj (-> in): handle=%d\n",
	165	sync->handle);
	166	return ret;
	167	}
	168
	169	ret = drm_sched_job_add_dependency(&job->base, in_fence);
	170	if (ret)
	171	return ret;
	172	}
	173
	174	return 0;
	175	}
	176
	177	static void
	178	nouveau_job_fence_attach_cleanup(struct nouveau_job *job)
	179	{
	180	int i;
	181
	182	for (i = 0; i < job->out_sync.count; i++) {
	183	struct drm_syncobj *obj = job->out_sync.objs[i];
	184	struct dma_fence_chain *chain = job->out_sync.chains[i];
	185
	186	if (obj)
	187	drm_syncobj_put(obj);
	188
	189	if (chain)
	190	dma_fence_chain_free(chain);
	191	}
	192	}
	193
	194	static int
	195	nouveau_job_fence_attach_prepare(struct nouveau_job *job)
	196	{
	197	int i, ret;
	198
	199	for (i = 0; i < job->out_sync.count; i++) {
	200	struct drm_nouveau_sync *sync = &job->out_sync.data[i];
	201	struct drm_syncobj **pobj = &job->out_sync.objs[i];
	202	struct dma_fence_chain **pchain = &job->out_sync.chains[i];
	203	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
	204
	205	if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
	206	stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
	207	ret = -EINVAL;
	208	goto err_sync_cleanup;
	209	}
210
211	*pobj = drm_syncobj_find(job->file_priv, sync->handle);
212	if (!*pobj) {
213	NV_PRINTK(warn, job->cli,
214	"Failed to find syncobj (-> out): handle=%d\n",
215	sync->handle);
216	ret = -ENOENT;
217	goto err_sync_cleanup;
218	}
219
220	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
221	*pchain = dma_fence_chain_alloc();
222	if (!*pchain) {
223	ret = -ENOMEM;
224	goto err_sync_cleanup;
225	}
226	}
227	}
228
229	return 0;
230
231	err_sync_cleanup:
232	nouveau_job_fence_attach_cleanup(job);
233	return ret;
234	}
235
236	static void
237	nouveau_job_fence_attach(struct nouveau_job *job)
238	{
239	struct dma_fence *fence = job->done_fence;
240	int i;
241
242	for (i = 0; i < job->out_sync.count; i++) {
243	struct drm_nouveau_sync *sync = &job->out_sync.data[i];
244	struct drm_syncobj **pobj = &job->out_sync.objs[i];
245	struct dma_fence_chain **pchain = &job->out_sync.chains[i];
246	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
247
248	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
249	drm_syncobj_add_point(pobj, pchain, fence,
250	sync->timeline_value);
251	} else {
252	drm_syncobj_replace_fence(*pobj, fence);
253	}
254
255	drm_syncobj_put(*pobj);
256	*pobj = NULL;
257	*pchain = NULL;
258	}
259	}
260
261	int
262	nouveau_job_submit(struct nouveau_job *job)
263	{
264	struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
265	struct dma_fence *done_fence = NULL;
266	int ret;
267
268	ret = nouveau_job_add_deps(job);
269	if (ret)
270	goto err;
271
272	ret = nouveau_job_fence_attach_prepare(job);
273	if (ret)
274	goto err;
275
276	/* Make sure the job appears on the sched_entity's queue in the same
277	* order as it was submitted.
278	*/
279	mutex_lock(&entity->mutex);
280
281	/* Guarantee we won't fail after the submit() callback returned
282	* successfully.
283	*/
284	if (job->ops->submit) {
285	ret = job->ops->submit(job);
286	if (ret)
287	goto err_cleanup;
288	}
289
290	drm_sched_job_arm(&job->base);
291	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
292	if (job->sync)
293	done_fence = dma_fence_get(job->done_fence);
294
6cdcc65f DK	295	/* If a sched job depends on a dma-fence from a job from the same GPU
	296	* scheduler instance, but a different scheduler entity, the GPU
	297	* scheduler does only wait for the particular job to be scheduled,
	298	* rather than for the job to fully complete. This is due to the GPU
	299	* scheduler assuming that there is a scheduler instance per ring.
	300	* However, the current implementation, in order to avoid arbitrary
	301	* amounts of kthreads, has a single scheduler instance while scheduler
	302	* entities represent rings.
	303	*
	304	* As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all
	305	* out-fences in order to force the scheduler to wait for full job
	306	* completion for dependent jobs from different entities and same
	307	* scheduler instance.
	308	*
	309	* There is some work in progress [1] to address the issues of firmware
	310	* schedulers; once it is in-tree the scheduler topology in Nouveau
	311	* should be re-worked accordingly.
	312	*
	313	* [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/
	314	*/
	315	set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
	316
b88baab8 DK	317	if (job->ops->armed_submit)
	318	job->ops->armed_submit(job);
	319
	320	nouveau_job_fence_attach(job);
	321
	322	/* Set job state before pushing the job to the scheduler,
	323	* such that we do not overwrite the job state set in run().
	324	*/
	325	job->state = NOUVEAU_JOB_SUBMIT_SUCCESS;
	326
	327	drm_sched_entity_push_job(&job->base);
	328
	329	mutex_unlock(&entity->mutex);
	330
	331	if (done_fence) {
	332	dma_fence_wait(done_fence, true);
	333	dma_fence_put(done_fence);
	334	}
	335
	336	return 0;
	337
	338	err_cleanup:
	339	mutex_unlock(&entity->mutex);
	340	nouveau_job_fence_attach_cleanup(job);
	341	err:
	342	job->state = NOUVEAU_JOB_SUBMIT_FAILED;
	343	return ret;
	344	}
	345
	346	bool
	347	nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
	348	struct work_struct *work)
	349	{
	350	return queue_work(entity->sched_wq, work);
	351	}
	352
	353	static struct dma_fence *
	354	nouveau_job_run(struct nouveau_job *job)
	355	{
	356	struct dma_fence *fence;
	357
	358	fence = job->ops->run(job);
	359	if (IS_ERR(fence))
	360	job->state = NOUVEAU_JOB_RUN_FAILED;
	361	else
	362	job->state = NOUVEAU_JOB_RUN_SUCCESS;
	363
	364	return fence;
	365	}
	366
	367	static struct dma_fence *
	368	nouveau_sched_run_job(struct drm_sched_job *sched_job)
	369	{
	370	struct nouveau_job *job = to_nouveau_job(sched_job);
	371
	372	return nouveau_job_run(job);
	373	}
	374
	375	static enum drm_gpu_sched_stat
	376	nouveau_sched_timedout_job(struct drm_sched_job *sched_job)
	377	{
31499b01	378	struct drm_gpu_scheduler *sched = sched_job->sched;
b88baab8	379	struct nouveau_job *job = to_nouveau_job(sched_job);
31499b01	380	enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL;
b88baab8	381
31499b01	382	drm_sched_stop(sched, sched_job);
b88baab8 DK	383
b88baab8 DK	384	if (job->ops->timeout)
31499b01 DK	385	stat = job->ops->timeout(job);
	386	else
	387	NV_PRINTK(warn, job->cli, "Generic job timeout.\n");
	388
	389	drm_sched_start(sched, true);
b88baab8	390
31499b01	391	return stat;
b88baab8 DK	392	}
	393
	394	static void
	395	nouveau_sched_free_job(struct drm_sched_job *sched_job)
	396	{
	397	struct nouveau_job *job = to_nouveau_job(sched_job);
	398
	399	nouveau_job_fini(job);
	400	}
	401
	402	int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
	403	struct drm_gpu_scheduler *sched,
	404	struct workqueue_struct *sched_wq)
	405	{
	406	mutex_init(&entity->mutex);
	407	spin_lock_init(&entity->job.list.lock);
	408	INIT_LIST_HEAD(&entity->job.list.head);
	409	init_waitqueue_head(&entity->job.wq);
	410
	411	entity->sched_wq = sched_wq;
	412	return drm_sched_entity_init(&entity->base,
	413	DRM_SCHED_PRIORITY_NORMAL,
	414	&sched, 1, NULL);
	415	}
	416
	417	void
	418	nouveau_sched_entity_fini(struct nouveau_sched_entity *entity)
	419	{
	420	drm_sched_entity_destroy(&entity->base);
	421	}
	422
	423	static const struct drm_sched_backend_ops nouveau_sched_ops = {
	424	.run_job = nouveau_sched_run_job,
	425	.timedout_job = nouveau_sched_timedout_job,
	426	.free_job = nouveau_sched_free_job,
	427	};
	428
	429	int nouveau_sched_init(struct nouveau_drm *drm)
	430	{
	431	struct drm_gpu_scheduler *sched = &drm->sched;
	432	long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
	433
	434	drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq");
	435	if (!drm->sched_wq)
	436	return -ENOMEM;
	437
	438	return drm_sched_init(sched, &nouveau_sched_ops,
	439	NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
	440	NULL, NULL, "nouveau_sched", drm->dev->dev);
	441	}
	442
	443	void nouveau_sched_fini(struct nouveau_drm *drm)
	444	{
	445	destroy_workqueue(drm->sched_wq);
	446	drm_sched_fini(&drm->sched);
	447	}