[thirdparty/linux.git] / drivers / gpu / drm / i915 / i915_scheduler.c

/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright © 2018 Intel Corporation
 */

#include <linux/mutex.h>

#include "i915_drv.h"
#include "i915_globals.h"
#include "i915_request.h"
#include "i915_scheduler.h"

static struct i915_global_scheduler {
	struct i915_global base;
	struct kmem_cache *slab_dependencies;
	struct kmem_cache *slab_priorities;
} global;

static DEFINE_SPINLOCK(schedule_lock);

static const struct i915_request *
node_to_request(const struct i915_sched_node *node)
{
	return container_of(node, const struct i915_request, sched);
}

static inline bool node_started(const struct i915_sched_node *node)
{
	return i915_request_started(node_to_request(node));
}

static inline bool node_signaled(const struct i915_sched_node *node)
{
	return i915_request_completed(node_to_request(node));
}

static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
	return rb_entry(rb, struct i915_priolist, node);
}

static void assert_priolists(struct intel_engine_execlists * const execlists)
{
	struct rb_node *rb;
	long last_prio, i;

	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
		return;

	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
		   rb_first(&execlists->queue.rb_root));

	last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
		const struct i915_priolist *p = to_priolist(rb);

		GEM_BUG_ON(p->priority >= last_prio);
		last_prio = p->priority;

		GEM_BUG_ON(!p->used);
		for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
			if (list_empty(&p->requests[i]))
				continue;

			GEM_BUG_ON(!(p->used & BIT(i)));
		}
	}
}

struct list_head *
i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
{
	struct intel_engine_execlists * const execlists = &engine->execlists;
	struct i915_priolist *p;
	struct rb_node **parent, *rb;
	bool first = true;
	int idx, i;

	lockdep_assert_held(&engine->active.lock);
	assert_priolists(execlists);

	/* buckets sorted from highest [in slot 0] to lowest priority */
	idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
	prio >>= I915_USER_PRIORITY_SHIFT;
	if (unlikely(execlists->no_priolist))
		prio = I915_PRIORITY_NORMAL;

find_priolist:
	/* most positive priority is scheduled first, equal priorities fifo */
	rb = NULL;
	parent = &execlists->queue.rb_root.rb_node;
	while (*parent) {
		rb = *parent;
		p = to_priolist(rb);
		if (prio > p->priority) {
			parent = &rb->rb_left;
		} else if (prio < p->priority) {
			parent = &rb->rb_right;
			first = false;
		} else {
			goto out;
		}
	}

	if (prio == I915_PRIORITY_NORMAL) {
		p = &execlists->default_priolist;
	} else {
		p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
		/* Convert an allocation failure to a priority bump */
		if (unlikely(!p)) {
			prio = I915_PRIORITY_NORMAL; /* recurses just once */

			/* To maintain ordering with all rendering, after an
			 * allocation failure we have to disable all scheduling.
			 * Requests will then be executed in fifo, and schedule
			 * will ensure that dependencies are emitted in fifo.
			 * There will be still some reordering with existing
			 * requests, so if userspace lied about their
			 * dependencies that reordering may be visible.
			 */
			execlists->no_priolist = true;
			goto find_priolist;
		}
	}

	p->priority = prio;
	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
		INIT_LIST_HEAD(&p->requests[i]);
	rb_link_node(&p->node, rb, parent);
	rb_insert_color_cached(&p->node, &execlists->queue, first);
	p->used = 0;

out:
	p->used |= BIT(idx);
	return &p->requests[idx];
}

void __i915_priolist_free(struct i915_priolist *p)
{
	kmem_cache_free(global.slab_priorities, p);
}

struct sched_cache {
	struct list_head *priolist;
};

static struct intel_engine_cs *
sched_lock_engine(const struct i915_sched_node *node,
		  struct intel_engine_cs *locked,
		  struct sched_cache *cache)
{
	const struct i915_request *rq = node_to_request(node);
	struct intel_engine_cs *engine;

	GEM_BUG_ON(!locked);

	/*
	 * Virtual engines complicate acquiring the engine timeline lock,
	 * as their rq->engine pointer is not stable until under that
	 * engine lock. The simple ploy we use is to take the lock then
	 * check that the rq still belongs to the newly locked engine.
	 */
	while (locked != (engine = READ_ONCE(rq->engine))) {
		spin_unlock(&locked->active.lock);
		memset(cache, 0, sizeof(*cache));
		spin_lock(&engine->active.lock);
		locked = engine;
	}

	GEM_BUG_ON(locked != engine);
	return locked;
}

static inline int rq_prio(const struct i915_request *rq)
{
	return rq->sched.attr.priority | __NO_PREEMPTION;
}

static inline bool need_preempt(int prio, int active)
{
	/*
	 * Allow preemption of low -> normal -> high, but we do
	 * not allow low priority tasks to preempt other low priority
	 * tasks under the impression that latency for low priority
	 * tasks does not matter (as much as background throughput),
	 * so kiss.
	 */
	return prio >= max(I915_PRIORITY_NORMAL, active);
}

static void kick_submission(struct intel_engine_cs *engine,
			    const struct i915_request *rq,
			    int prio)
{
	const struct i915_request *inflight;

	/*
	 * We only need to kick the tasklet once for the high priority
	 * new context we add into the queue.
	 */
	if (prio <= engine->execlists.queue_priority_hint)
		return;

	rcu_read_lock();

	/* Nothing currently active? We're overdue for a submission! */
	inflight = execlists_active(&engine->execlists);
	if (!inflight)
		goto unlock;

	engine->execlists.queue_priority_hint = prio;

	/*
	 * If we are already the currently executing context, don't
	 * bother evaluating if we should preempt ourselves.
	 */
	if (inflight->context == rq->context)
		goto unlock;

	if (need_preempt(prio, rq_prio(inflight)))
		tasklet_hi_schedule(&engine->execlists.tasklet);

unlock:
	rcu_read_unlock();
}

static void __i915_schedule(struct i915_sched_node *node,
			    const struct i915_sched_attr *attr)
{
	const int prio = max(attr->priority, node->attr.priority);
	struct intel_engine_cs *engine;
	struct i915_dependency *dep, *p;
	struct i915_dependency stack;
	struct sched_cache cache;
	LIST_HEAD(dfs);

	/* Needed in order to use the temporary link inside i915_dependency */
	lockdep_assert_held(&schedule_lock);
	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);

	if (node_signaled(node))
		return;

	stack.signaler = node;
	list_add(&stack.dfs_link, &dfs);

	/*
	 * Recursively bump all dependent priorities to match the new request.
	 *
	 * A naive approach would be to use recursion:
	 * static void update_priorities(struct i915_sched_node *node, prio) {
	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
	 *		update_priorities(dep->signal, prio)
	 *	queue_request(node);
	 * }
	 * but that may have unlimited recursion depth and so runs a very
	 * real risk of overunning the kernel stack. Instead, we build
	 * a flat list of all dependencies starting with the current request.
	 * As we walk the list of dependencies, we add all of its dependencies
	 * to the end of the list (this may include an already visited
	 * request) and continue to walk onwards onto the new dependencies. The
	 * end result is a topological list of requests in reverse order, the
	 * last element in the list is the request we must execute first.
	 */
	list_for_each_entry(dep, &dfs, dfs_link) {
		struct i915_sched_node *node = dep->signaler;

		/* If we are already flying, we know we have no signalers */
		if (node_started(node))
			continue;

		/*
		 * Within an engine, there can be no cycle, but we may
		 * refer to the same dependency chain multiple times
		 * (redundant dependencies are not eliminated) and across
		 * engines.
		 */
		list_for_each_entry(p, &node->signalers_list, signal_link) {
			GEM_BUG_ON(p == dep); /* no cycles! */

			if (node_signaled(p->signaler))
				continue;

			if (prio > READ_ONCE(p->signaler->attr.priority))
				list_move_tail(&p->dfs_link, &dfs);
		}
	}

	/*
	 * If we didn't need to bump any existing priorities, and we haven't
	 * yet submitted this request (i.e. there is no potential race with
	 * execlists_submit_request()), we can set our own priority and skip
	 * acquiring the engine locks.
	 */
	if (node->attr.priority == I915_PRIORITY_INVALID) {
		GEM_BUG_ON(!list_empty(&node->link));
		node->attr = *attr;

		if (stack.dfs_link.next == stack.dfs_link.prev)
			return;

		__list_del_entry(&stack.dfs_link);
	}

	memset(&cache, 0, sizeof(cache));
	engine = node_to_request(node)->engine;
	spin_lock(&engine->active.lock);

	/* Fifo and depth-first replacement ensure our deps execute before us */
	engine = sched_lock_engine(node, engine, &cache);
	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
		INIT_LIST_HEAD(&dep->dfs_link);

		node = dep->signaler;
		engine = sched_lock_engine(node, engine, &cache);
		lockdep_assert_held(&engine->active.lock);

		/* Recheck after acquiring the engine->timeline.lock */
		if (prio <= node->attr.priority || node_signaled(node))
			continue;

		GEM_BUG_ON(node_to_request(node)->engine != engine);

		WRITE_ONCE(node->attr.priority, prio);

		/*
		 * Once the request is ready, it will be placed into the
		 * priority lists and then onto the HW runlist. Before the
		 * request is ready, it does not contribute to our preemption
		 * decisions and we can safely ignore it, as it will, and
		 * any preemption required, be dealt with upon submission.
		 * See engine->submit_request()
		 */
		if (list_empty(&node->link))
			continue;

		if (i915_request_in_priority_queue(node_to_request(node))) {
			if (!cache.priolist)
				cache.priolist =
					i915_sched_lookup_priolist(engine,
								   prio);
			list_move_tail(&node->link, cache.priolist);
		}

		/* Defer (tasklet) submission until after all of our updates. */
		kick_submission(engine, node_to_request(node), prio);
	}

	spin_unlock(&engine->active.lock);
}

void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
{
	spin_lock_irq(&schedule_lock);
	__i915_schedule(&rq->sched, attr);
	spin_unlock_irq(&schedule_lock);
}

static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
{
	struct i915_sched_attr attr = node->attr;

	if (attr.priority & bump)
		return;

	attr.priority |= bump;
	__i915_schedule(node, &attr);
}

void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
{
	unsigned long flags;

	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
	if (READ_ONCE(rq->sched.attr.priority) & bump)
		return;

	spin_lock_irqsave(&schedule_lock, flags);
	__bump_priority(&rq->sched, bump);
	spin_unlock_irqrestore(&schedule_lock, flags);
}

void i915_sched_node_init(struct i915_sched_node *node)
{
	INIT_LIST_HEAD(&node->signalers_list);
	INIT_LIST_HEAD(&node->waiters_list);
	INIT_LIST_HEAD(&node->link);

	i915_sched_node_reinit(node);
}

void i915_sched_node_reinit(struct i915_sched_node *node)
{
	node->attr.priority = I915_PRIORITY_INVALID;
	node->semaphores = 0;
	node->flags = 0;

	GEM_BUG_ON(!list_empty(&node->signalers_list));
	GEM_BUG_ON(!list_empty(&node->waiters_list));
	GEM_BUG_ON(!list_empty(&node->link));
}

static struct i915_dependency *
i915_dependency_alloc(void)
{
	return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
}

static void
i915_dependency_free(struct i915_dependency *dep)
{
	kmem_cache_free(global.slab_dependencies, dep);
}

bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
				      struct i915_sched_node *signal,
				      struct i915_dependency *dep,
				      unsigned long flags)
{
	bool ret = false;

	spin_lock_irq(&schedule_lock);

	if (!node_signaled(signal)) {
		INIT_LIST_HEAD(&dep->dfs_link);
		dep->signaler = signal;
		dep->waiter = node;
		dep->flags = flags;

		/* Keep track of whether anyone on this chain has a semaphore */
		if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
		    !node_started(signal))
			node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;

		/* All set, now publish. Beware the lockless walkers. */
		list_add_rcu(&dep->signal_link, &node->signalers_list);
		list_add_rcu(&dep->wait_link, &signal->waiters_list);

		/*
		 * As we do not allow WAIT to preempt inflight requests,
		 * once we have executed a request, along with triggering
		 * any execution callbacks, we must preserve its ordering
		 * within the non-preemptible FIFO.
		 */
		BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
		if (flags & I915_DEPENDENCY_EXTERNAL)
			__bump_priority(signal, __NO_PREEMPTION);

		ret = true;
	}

	spin_unlock_irq(&schedule_lock);

	return ret;
}

int i915_sched_node_add_dependency(struct i915_sched_node *node,
				   struct i915_sched_node *signal,
				   unsigned long flags)
{
	struct i915_dependency *dep;

	dep = i915_dependency_alloc();
	if (!dep)
		return -ENOMEM;

	if (!__i915_sched_node_add_dependency(node, signal, dep,
					      flags | I915_DEPENDENCY_ALLOC))
		i915_dependency_free(dep);

	return 0;
}

void i915_sched_node_fini(struct i915_sched_node *node)
{
	struct i915_dependency *dep, *tmp;

	spin_lock_irq(&schedule_lock);

	/*
	 * Everyone we depended upon (the fences we wait to be signaled)
	 * should retire before us and remove themselves from our list.
	 * However, retirement is run independently on each timeline and
	 * so we may be called out-of-order.
	 */
	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
		GEM_BUG_ON(!list_empty(&dep->dfs_link));

		list_del_rcu(&dep->wait_link);
		if (dep->flags & I915_DEPENDENCY_ALLOC)
			i915_dependency_free(dep);
	}
	INIT_LIST_HEAD(&node->signalers_list);

	/* Remove ourselves from everyone who depends upon us */
	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
		GEM_BUG_ON(dep->signaler != node);
		GEM_BUG_ON(!list_empty(&dep->dfs_link));

		list_del_rcu(&dep->signal_link);
		if (dep->flags & I915_DEPENDENCY_ALLOC)
			i915_dependency_free(dep);
	}
	INIT_LIST_HEAD(&node->waiters_list);

	spin_unlock_irq(&schedule_lock);
}

static void i915_global_scheduler_shrink(void)
{
	kmem_cache_shrink(global.slab_dependencies);
	kmem_cache_shrink(global.slab_priorities);
}

static void i915_global_scheduler_exit(void)
{
	kmem_cache_destroy(global.slab_dependencies);
	kmem_cache_destroy(global.slab_priorities);
}

static struct i915_global_scheduler global = { {
	.shrink = i915_global_scheduler_shrink,
	.exit = i915_global_scheduler_exit,
} };

int __init i915_global_scheduler_init(void)
{
	global.slab_dependencies = KMEM_CACHE(i915_dependency,
					      SLAB_HWCACHE_ALIGN |
					      SLAB_TYPESAFE_BY_RCU);
	if (!global.slab_dependencies)
		return -ENOMEM;

	global.slab_priorities = KMEM_CACHE(i915_priolist,
					    SLAB_HWCACHE_ALIGN);
	if (!global.slab_priorities)
		goto err_priorities;

	i915_global_register(&global.base);
	return 0;

err_priorities:
	kmem_cache_destroy(global.slab_priorities);
	return -ENOMEM;
}
Commit	Line	Data
e2f3496e CW	1	/*
	2	* SPDX-License-Identifier: MIT
	3	*
	4	* Copyright © 2018 Intel Corporation
	5	*/
	6
	7	#include <linux/mutex.h>
	8
	9	#include "i915_drv.h"
103b76ee	10	#include "i915_globals.h"
e2f3496e CW	11	#include "i915_request.h"
	12	#include "i915_scheduler.h"
	13
32eb6bcf	14	static struct i915_global_scheduler {
103b76ee	15	struct i915_global base;
32eb6bcf CW	16	struct kmem_cache *slab_dependencies;
	17	struct kmem_cache *slab_priorities;
	18	} global;
	19
e2f3496e CW	20	static DEFINE_SPINLOCK(schedule_lock);
	21
	22	static const struct i915_request *
	23	node_to_request(const struct i915_sched_node *node)
	24	{
	25	return container_of(node, const struct i915_request, sched);
	26	}
	27
babfb1b5 CW	28	static inline bool node_started(const struct i915_sched_node *node)
	29	{
	30	return i915_request_started(node_to_request(node));
	31	}
	32
e2f3496e CW	33	static inline bool node_signaled(const struct i915_sched_node *node)
	34	{
	35	return i915_request_completed(node_to_request(node));
	36	}
	37
e2f3496e CW	38	static inline struct i915_priolist to_priolist(struct rb_node rb)
	39	{
	40	return rb_entry(rb, struct i915_priolist, node);
	41	}
	42
4d97cbe0	43	static void assert_priolists(struct intel_engine_execlists * const execlists)
e2f3496e CW	44	{
	45	struct rb_node *rb;
	46	long last_prio, i;
	47
	48	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
	49	return;
	50
	51	GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
	52	rb_first(&execlists->queue.rb_root));
	53
4d97cbe0	54	last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
e2f3496e CW	55	for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
	56	const struct i915_priolist *p = to_priolist(rb);
	57
	58	GEM_BUG_ON(p->priority >= last_prio);
	59	last_prio = p->priority;
	60
	61	GEM_BUG_ON(!p->used);
	62	for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
	63	if (list_empty(&p->requests[i]))
	64	continue;
	65
	66	GEM_BUG_ON(!(p->used & BIT(i)));
	67	}
	68	}
	69	}
	70
	71	struct list_head *
	72	i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
	73	{
	74	struct intel_engine_execlists * const execlists = &engine->execlists;
	75	struct i915_priolist *p;
	76	struct rb_node *parent, rb;
	77	bool first = true;
	78	int idx, i;
	79
422d7df4	80	lockdep_assert_held(&engine->active.lock);
4d97cbe0	81	assert_priolists(execlists);
e2f3496e CW	82
	83	/* buckets sorted from highest [in slot 0] to lowest priority */
	84	idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
	85	prio >>= I915_USER_PRIORITY_SHIFT;
	86	if (unlikely(execlists->no_priolist))
	87	prio = I915_PRIORITY_NORMAL;
	88
	89	find_priolist:
	90	/* most positive priority is scheduled first, equal priorities fifo */
	91	rb = NULL;
	92	parent = &execlists->queue.rb_root.rb_node;
	93	while (*parent) {
	94	rb = *parent;
	95	p = to_priolist(rb);
	96	if (prio > p->priority) {
	97	parent = &rb->rb_left;
	98	} else if (prio < p->priority) {
	99	parent = &rb->rb_right;
	100	first = false;
	101	} else {
	102	goto out;
	103	}
	104	}
	105
	106	if (prio == I915_PRIORITY_NORMAL) {
	107	p = &execlists->default_priolist;
	108	} else {
32eb6bcf	109	p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
e2f3496e CW	110	/* Convert an allocation failure to a priority bump */
	111	if (unlikely(!p)) {
	112	prio = I915_PRIORITY_NORMAL; /* recurses just once */
	113
	114	/* To maintain ordering with all rendering, after an
	115	* allocation failure we have to disable all scheduling.
	116	* Requests will then be executed in fifo, and schedule
	117	* will ensure that dependencies are emitted in fifo.
	118	* There will be still some reordering with existing
	119	* requests, so if userspace lied about their
	120	* dependencies that reordering may be visible.
	121	*/
	122	execlists->no_priolist = true;
	123	goto find_priolist;
	124	}
	125	}
	126
	127	p->priority = prio;
	128	for (i = 0; i < ARRAY_SIZE(p->requests); i++)
	129	INIT_LIST_HEAD(&p->requests[i]);
	130	rb_link_node(&p->node, rb, parent);
	131	rb_insert_color_cached(&p->node, &execlists->queue, first);
	132	p->used = 0;
	133
	134	out:
	135	p->used \|= BIT(idx);
	136	return &p->requests[idx];
	137	}
	138
5ae87063 CW	139	void __i915_priolist_free(struct i915_priolist *p)
	140	{
	141	kmem_cache_free(global.slab_priorities, p);
	142	}
	143
ed7dc677 CW	144	struct sched_cache {
	145	struct list_head *priolist;
	146	};
	147
e2f3496e	148	static struct intel_engine_cs *
ed7dc677 CW	149	sched_lock_engine(const struct i915_sched_node *node,
	150	struct intel_engine_cs *locked,
	151	struct sched_cache *cache)
e2f3496e	152	{
6d06779e CW	153	const struct i915_request *rq = node_to_request(node);
6d06779e CW	154	struct intel_engine_cs *engine;
e2f3496e CW	155
	156	GEM_BUG_ON(!locked);
	157
6d06779e CW	158	/*
	159	* Virtual engines complicate acquiring the engine timeline lock,
	160	* as their rq->engine pointer is not stable until under that
	161	* engine lock. The simple ploy we use is to take the lock then
	162	* check that the rq still belongs to the newly locked engine.
	163	*/
	164	while (locked != (engine = READ_ONCE(rq->engine))) {
422d7df4	165	spin_unlock(&locked->active.lock);
ed7dc677	166	memset(cache, 0, sizeof(*cache));
422d7df4	167	spin_lock(&engine->active.lock);
6d06779e	168	locked = engine;
e2f3496e CW	169	}
e2f3496e CW	170
6d06779e CW	171	GEM_BUG_ON(locked != engine);
6d06779e CW	172	return locked;
e2f3496e CW	173	}
e2f3496e CW	174
25d851ad	175	static inline int rq_prio(const struct i915_request *rq)
c9a64622	176	{
25d851ad CW	177	return rq->sched.attr.priority \| __NO_PREEMPTION;
	178	}
	179
253a774b CW	180	static inline bool need_preempt(int prio, int active)
	181	{
	182	/*
	183	* Allow preemption of low -> normal -> high, but we do
	184	* not allow low priority tasks to preempt other low priority
	185	* tasks under the impression that latency for low priority
	186	* tasks does not matter (as much as background throughput),
	187	* so kiss.
	188	*/
	189	return prio >= max(I915_PRIORITY_NORMAL, active);
	190	}
	191
13ed13a4 CW	192	static void kick_submission(struct intel_engine_cs *engine,
	193	const struct i915_request *rq,
	194	int prio)
25d851ad	195	{
13ed13a4 CW	196	const struct i915_request *inflight;
	197
	198	/*
	199	* We only need to kick the tasklet once for the high priority
	200	* new context we add into the queue.
	201	*/
	202	if (prio <= engine->execlists.queue_priority_hint)
	203	return;
	204
7d148635 CW	205	rcu_read_lock();
7d148635 CW	206
13ed13a4 CW	207	/* Nothing currently active? We're overdue for a submission! */
	208	inflight = execlists_active(&engine->execlists);
	209	if (!inflight)
7d148635	210	goto unlock;
c9a64622	211
6cebcf74 CW	212	engine->execlists.queue_priority_hint = prio;
6cebcf74 CW	213
25d851ad CW	214	/*
25d851ad CW	215	* If we are already the currently executing context, don't
13ed13a4	216	* bother evaluating if we should preempt ourselves.
25d851ad	217	*/
9f3ccd40	218	if (inflight->context == rq->context)
7d148635	219	goto unlock;
c9a64622	220
13ed13a4 CW	221	if (need_preempt(prio, rq_prio(inflight)))
13ed13a4 CW	222	tasklet_hi_schedule(&engine->execlists.tasklet);
7d148635 CW	223
	224	unlock:
	225	rcu_read_unlock();
c9a64622 CW	226	}
c9a64622 CW	227
52c76fb1	228	static void __i915_schedule(struct i915_sched_node *node,
e9eaf82d	229	const struct i915_sched_attr *attr)
e2f3496e	230	{
26fc4e4b	231	const int prio = max(attr->priority, node->attr.priority);
ed7dc677	232	struct intel_engine_cs *engine;
e2f3496e CW	233	struct i915_dependency dep, p;
e2f3496e CW	234	struct i915_dependency stack;
ed7dc677	235	struct sched_cache cache;
e2f3496e CW	236	LIST_HEAD(dfs);
e2f3496e CW	237
e9eaf82d CW	238	/* Needed in order to use the temporary link inside i915_dependency */
e9eaf82d CW	239	lockdep_assert_held(&schedule_lock);
e2f3496e CW	240	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
e2f3496e CW	241
19098018	242	if (node_signaled(node))
e2f3496e CW	243	return;
e2f3496e CW	244
52c76fb1	245	stack.signaler = node;
e2f3496e CW	246	list_add(&stack.dfs_link, &dfs);
	247
	248	/*
	249	* Recursively bump all dependent priorities to match the new request.
	250	*
	251	* A naive approach would be to use recursion:
	252	* static void update_priorities(struct i915_sched_node *node, prio) {
	253	* list_for_each_entry(dep, &node->signalers_list, signal_link)
	254	* update_priorities(dep->signal, prio)
	255	* queue_request(node);
	256	* }
	257	* but that may have unlimited recursion depth and so runs a very
	258	* real risk of overunning the kernel stack. Instead, we build
	259	* a flat list of all dependencies starting with the current request.
	260	* As we walk the list of dependencies, we add all of its dependencies
	261	* to the end of the list (this may include an already visited
	262	* request) and continue to walk onwards onto the new dependencies. The
	263	* end result is a topological list of requests in reverse order, the
	264	* last element in the list is the request we must execute first.
	265	*/
	266	list_for_each_entry(dep, &dfs, dfs_link) {
	267	struct i915_sched_node *node = dep->signaler;
	268
babfb1b5 CW	269	/* If we are already flying, we know we have no signalers */
	270	if (node_started(node))
	271	continue;
	272
e2f3496e CW	273	/*
	274	* Within an engine, there can be no cycle, but we may
	275	* refer to the same dependency chain multiple times
	276	* (redundant dependencies are not eliminated) and across
	277	* engines.
	278	*/
	279	list_for_each_entry(p, &node->signalers_list, signal_link) {
	280	GEM_BUG_ON(p == dep); /* no cycles! */
	281
	282	if (node_signaled(p->signaler))
	283	continue;
	284
e2f3496e CW	285	if (prio > READ_ONCE(p->signaler->attr.priority))
	286	list_move_tail(&p->dfs_link, &dfs);
	287	}
	288	}
	289
	290	/*
	291	* If we didn't need to bump any existing priorities, and we haven't
	292	* yet submitted this request (i.e. there is no potential race with
	293	* execlists_submit_request()), we can set our own priority and skip
	294	* acquiring the engine locks.
	295	*/
52c76fb1 CW	296	if (node->attr.priority == I915_PRIORITY_INVALID) {
	297	GEM_BUG_ON(!list_empty(&node->link));
	298	node->attr = *attr;
e2f3496e CW	299
e2f3496e CW	300	if (stack.dfs_link.next == stack.dfs_link.prev)
e9eaf82d	301	return;
e2f3496e CW	302
	303	__list_del_entry(&stack.dfs_link);
	304	}
	305
ed7dc677	306	memset(&cache, 0, sizeof(cache));
52c76fb1	307	engine = node_to_request(node)->engine;
422d7df4	308	spin_lock(&engine->active.lock);
e2f3496e CW	309
e2f3496e CW	310	/* Fifo and depth-first replacement ensure our deps execute before us */
6d06779e	311	engine = sched_lock_engine(node, engine, &cache);
e2f3496e	312	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
e2f3496e CW	313	INIT_LIST_HEAD(&dep->dfs_link);
e2f3496e CW	314
52c76fb1	315	node = dep->signaler;
ed7dc677	316	engine = sched_lock_engine(node, engine, &cache);
422d7df4	317	lockdep_assert_held(&engine->active.lock);
e2f3496e CW	318
	319	/* Recheck after acquiring the engine->timeline.lock */
	320	if (prio <= node->attr.priority \|\| node_signaled(node))
	321	continue;
	322
6d06779e CW	323	GEM_BUG_ON(node_to_request(node)->engine != engine);
6d06779e CW	324
a4e648a0	325	WRITE_ONCE(node->attr.priority, prio);
422d7df4	326
672c368f CW	327	/*
	328	* Once the request is ready, it will be placed into the
	329	* priority lists and then onto the HW runlist. Before the
	330	* request is ready, it does not contribute to our preemption
	331	* decisions and we can safely ignore it, as it will, and
	332	* any preemption required, be dealt with upon submission.
	333	* See engine->submit_request()
	334	*/
	335	if (list_empty(&node->link))
422d7df4	336	continue;
422d7df4	337
672c368f	338	if (i915_request_in_priority_queue(node_to_request(node))) {
422d7df4 CW	339	if (!cache.priolist)
	340	cache.priolist =
	341	i915_sched_lookup_priolist(engine,
	342	prio);
	343	list_move_tail(&node->link, cache.priolist);
e2f3496e CW	344	}
e2f3496e CW	345
e2f3496e	346	/* Defer (tasklet) submission until after all of our updates. */
13ed13a4	347	kick_submission(engine, node_to_request(node), prio);
e2f3496e CW	348	}
e2f3496e CW	349
422d7df4	350	spin_unlock(&engine->active.lock);
e9eaf82d	351	}
e2f3496e	352
e9eaf82d CW	353	void i915_schedule(struct i915_request rq, const struct i915_sched_attr attr)
e9eaf82d CW	354	{
b7404c7e	355	spin_lock_irq(&schedule_lock);
52c76fb1	356	__i915_schedule(&rq->sched, attr);
b7404c7e	357	spin_unlock_irq(&schedule_lock);
e2f3496e	358	}
e9eaf82d	359
52c76fb1 CW	360	static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
	361	{
	362	struct i915_sched_attr attr = node->attr;
	363
54738e8a CW	364	if (attr.priority & bump)
	365	return;
	366
52c76fb1 CW	367	attr.priority \|= bump;
	368	__i915_schedule(node, &attr);
	369	}
	370
e9eaf82d CW	371	void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
e9eaf82d CW	372	{
b7404c7e	373	unsigned long flags;
e9eaf82d CW	374
e9eaf82d CW	375	GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
a79ca656	376	if (READ_ONCE(rq->sched.attr.priority) & bump)
e9eaf82d CW	377	return;
e9eaf82d CW	378
b7404c7e	379	spin_lock_irqsave(&schedule_lock, flags);
52c76fb1	380	__bump_priority(&rq->sched, bump);
b7404c7e	381	spin_unlock_irqrestore(&schedule_lock, flags);
e9eaf82d	382	}
32eb6bcf	383
5ae87063	384	void i915_sched_node_init(struct i915_sched_node *node)
32eb6bcf	385	{
5ae87063 CW	386	INIT_LIST_HEAD(&node->signalers_list);
	387	INIT_LIST_HEAD(&node->waiters_list);
	388	INIT_LIST_HEAD(&node->link);
67a3acaa CW	389
	390	i915_sched_node_reinit(node);
	391	}
	392
	393	void i915_sched_node_reinit(struct i915_sched_node *node)
	394	{
5ae87063 CW	395	node->attr.priority = I915_PRIORITY_INVALID;
	396	node->semaphores = 0;
	397	node->flags = 0;
67a3acaa CW	398
	399	GEM_BUG_ON(!list_empty(&node->signalers_list));
	400	GEM_BUG_ON(!list_empty(&node->waiters_list));
	401	GEM_BUG_ON(!list_empty(&node->link));
5ae87063 CW	402	}
	403
	404	static struct i915_dependency *
	405	i915_dependency_alloc(void)
	406	{
	407	return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
	408	}
	409
	410	static void
	411	i915_dependency_free(struct i915_dependency *dep)
	412	{
	413	kmem_cache_free(global.slab_dependencies, dep);
	414	}
	415
	416	bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
	417	struct i915_sched_node *signal,
	418	struct i915_dependency *dep,
	419	unsigned long flags)
	420	{
	421	bool ret = false;
	422
	423	spin_lock_irq(&schedule_lock);
	424
	425	if (!node_signaled(signal)) {
	426	INIT_LIST_HEAD(&dep->dfs_link);
5ae87063	427	dep->signaler = signal;
8ee36e04	428	dep->waiter = node;
5ae87063 CW	429	dep->flags = flags;
	430
	431	/* Keep track of whether anyone on this chain has a semaphore */
	432	if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
	433	!node_started(signal))
	434	node->flags \|= I915_SCHED_HAS_SEMAPHORE_CHAIN;
	435
f14f27b1	436	/* All set, now publish. Beware the lockless walkers. */
793c2261	437	list_add_rcu(&dep->signal_link, &node->signalers_list);
f14f27b1 CW	438	list_add_rcu(&dep->wait_link, &signal->waiters_list);
f14f27b1 CW	439
6e7eb7a8 CW	440	/*
	441	* As we do not allow WAIT to preempt inflight requests,
	442	* once we have executed a request, along with triggering
	443	* any execution callbacks, we must preserve its ordering
	444	* within the non-preemptible FIFO.
	445	*/
	446	BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
	447	if (flags & I915_DEPENDENCY_EXTERNAL)
	448	__bump_priority(signal, __NO_PREEMPTION);
	449
5ae87063 CW	450	ret = true;
	451	}
	452
	453	spin_unlock_irq(&schedule_lock);
	454
	455	return ret;
	456	}
	457
	458	int i915_sched_node_add_dependency(struct i915_sched_node *node,
a9d094dc CW	459	struct i915_sched_node *signal,
a9d094dc CW	460	unsigned long flags)
5ae87063 CW	461	{
	462	struct i915_dependency *dep;
	463
	464	dep = i915_dependency_alloc();
	465	if (!dep)
	466	return -ENOMEM;
	467
	468	if (!__i915_sched_node_add_dependency(node, signal, dep,
a9d094dc	469	flags \| I915_DEPENDENCY_ALLOC))
5ae87063 CW	470	i915_dependency_free(dep);
	471
	472	return 0;
	473	}
	474
	475	void i915_sched_node_fini(struct i915_sched_node *node)
	476	{
	477	struct i915_dependency dep, tmp;
	478
5ae87063 CW	479	spin_lock_irq(&schedule_lock);
	480
	481	/*
	482	* Everyone we depended upon (the fences we wait to be signaled)
	483	* should retire before us and remove themselves from our list.
	484	* However, retirement is run independently on each timeline and
	485	* so we may be called out-of-order.
	486	*/
	487	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
5ae87063 CW	488	GEM_BUG_ON(!list_empty(&dep->dfs_link));
5ae87063 CW	489
66940061	490	list_del_rcu(&dep->wait_link);
5ae87063 CW	491	if (dep->flags & I915_DEPENDENCY_ALLOC)
	492	i915_dependency_free(dep);
	493	}
67a3acaa	494	INIT_LIST_HEAD(&node->signalers_list);
5ae87063 CW	495
	496	/* Remove ourselves from everyone who depends upon us */
	497	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
	498	GEM_BUG_ON(dep->signaler != node);
	499	GEM_BUG_ON(!list_empty(&dep->dfs_link));
	500
66940061	501	list_del_rcu(&dep->signal_link);
5ae87063 CW	502	if (dep->flags & I915_DEPENDENCY_ALLOC)
	503	i915_dependency_free(dep);
	504	}
67a3acaa	505	INIT_LIST_HEAD(&node->waiters_list);
5ae87063 CW	506
5ae87063 CW	507	spin_unlock_irq(&schedule_lock);
32eb6bcf CW	508	}
32eb6bcf CW	509
103b76ee CW	510	static void i915_global_scheduler_shrink(void)
	511	{
	512	kmem_cache_shrink(global.slab_dependencies);
	513	kmem_cache_shrink(global.slab_priorities);
	514	}
	515
	516	static void i915_global_scheduler_exit(void)
	517	{
	518	kmem_cache_destroy(global.slab_dependencies);
	519	kmem_cache_destroy(global.slab_priorities);
	520	}
	521
	522	static struct i915_global_scheduler global = { {
	523	.shrink = i915_global_scheduler_shrink,
	524	.exit = i915_global_scheduler_exit,
	525	} };
	526
32eb6bcf CW	527	int __init i915_global_scheduler_init(void)
	528	{
	529	global.slab_dependencies = KMEM_CACHE(i915_dependency,
66940061 CW	530	SLAB_HWCACHE_ALIGN \|
66940061 CW	531	SLAB_TYPESAFE_BY_RCU);
32eb6bcf CW	532	if (!global.slab_dependencies)
	533	return -ENOMEM;
	534
	535	global.slab_priorities = KMEM_CACHE(i915_priolist,
	536	SLAB_HWCACHE_ALIGN);
	537	if (!global.slab_priorities)
	538	goto err_priorities;
	539
103b76ee	540	i915_global_register(&global.base);
32eb6bcf CW	541	return 0;
	542
	543	err_priorities:
	544	kmem_cache_destroy(global.slab_priorities);
	545	return -ENOMEM;
	546	}