#include "../workqueue_internal.h"
#include "../../io_uring/io-wq.h"
#include "../smpboot.h"
+#include "../locking/mutex.h"
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
struct set_affinity_pending my_pending = { }, *pending = NULL;
bool stop_pending, complete = false;
- /* Can the task run on the task's current CPU? If so, we're done */
- if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
+ /*
+ * Can the task run on the task's current CPU? If so, we're done
+ *
+ * We are also done if the task is the current donor, boosting a lock-
+ * holding proxy, (and potentially has been migrated outside its
+ * current or previous affinity mask)
+ */
+ if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask) ||
+ (task_current_donor(rq, p) && !task_current(rq, p))) {
struct task_struct *push_task = NULL;
if ((flags & SCA_MIGRATE_ENABLE) &&
}
#ifdef CONFIG_SCHED_PROXY_EXEC
-static inline void proxy_resched_idle(struct rq *rq)
+static inline struct task_struct *proxy_resched_idle(struct rq *rq)
{
put_prev_set_next_task(rq, rq->donor, rq->idle);
rq_set_donor(rq, rq->idle);
set_tsk_need_resched(rq->idle);
+ return rq->idle;
}
static bool __proxy_deactivate(struct rq *rq, struct task_struct *donor)
}
/*
- * Initial simple sketch that just deactivates the blocked task
- * chosen by pick_next_task() so we can then pick something that
- * isn't blocked.
+ * Find runnable lock owner to proxy for mutex blocked donor
+ *
+ * Follow the blocked-on relation:
+ * task->blocked_on -> mutex->owner -> task...
+ *
+ * Lock order:
+ *
+ * p->pi_lock
+ * rq->lock
+ * mutex->wait_lock
+ *
+ * Returns the task that is going to be used as execution context (the one
+ * that is actually going to be run on cpu_of(rq)).
*/
static struct task_struct *
find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
{
+ struct task_struct *owner = NULL;
+ int this_cpu = cpu_of(rq);
+ struct task_struct *p;
struct mutex *mutex;
- mutex = donor->blocked_on;
- /* Something changed in the chain, so pick again */
- if (!mutex)
- return NULL;
- /*
- * By taking mutex->wait_lock we hold off concurrent mutex_unlock()
- * and ensure @owner sticks around.
- */
- guard(raw_spinlock)(&mutex->wait_lock);
+ /* Follow blocked_on chain. */
+ for (p = donor; task_is_blocked(p); p = owner) {
+ mutex = p->blocked_on;
+ /* Something changed in the chain, so pick again */
+ if (!mutex)
+ return NULL;
+ /*
+ * By taking mutex->wait_lock we hold off concurrent mutex_unlock()
+ * and ensure @owner sticks around.
+ */
+ guard(raw_spinlock)(&mutex->wait_lock);
- /* Check again that donor is blocked with blocked_lock held */
- if (!task_is_blocked(donor) || mutex != __get_task_blocked_on(donor)) {
+ /* Check again that p is blocked with wait_lock held */
+ if (mutex != __get_task_blocked_on(p)) {
+ /*
+ * Something changed in the blocked_on chain and
+ * we don't know if only at this level. So, let's
+ * just bail out completely and let __schedule()
+ * figure things out (pick_again loop).
+ */
+ return NULL;
+ }
+
+ owner = __mutex_owner(mutex);
+ if (!owner) {
+ __clear_task_blocked_on(p, mutex);
+ return p;
+ }
+
+ if (!READ_ONCE(owner->on_rq) || owner->se.sched_delayed) {
+ /* XXX Don't handle blocked owners/delayed dequeue yet */
+ return proxy_deactivate(rq, donor);
+ }
+
+ if (task_cpu(owner) != this_cpu) {
+ /* XXX Don't handle migrations yet */
+ return proxy_deactivate(rq, donor);
+ }
+
+ if (task_on_rq_migrating(owner)) {
+ /*
+ * One of the chain of mutex owners is currently migrating to this
+ * CPU, but has not yet been enqueued because we are holding the
+ * rq lock. As a simple solution, just schedule rq->idle to give
+ * the migration a chance to complete. Much like the migrate_task
+ * case we should end up back in find_proxy_task(), this time
+ * hopefully with all relevant tasks already enqueued.
+ */
+ return proxy_resched_idle(rq);
+ }
+
+ /*
+ * Its possible to race where after we check owner->on_rq
+ * but before we check (owner_cpu != this_cpu) that the
+ * task on another cpu was migrated back to this cpu. In
+ * that case it could slip by our checks. So double check
+ * we are still on this cpu and not migrating. If we get
+ * inconsistent results, try again.
+ */
+ if (!task_on_rq_queued(owner) || task_cpu(owner) != this_cpu)
+ return NULL;
+
+ if (owner == p) {
+ /*
+ * It's possible we interleave with mutex_unlock like:
+ *
+ * lock(&rq->lock);
+ * find_proxy_task()
+ * mutex_unlock()
+ * lock(&wait_lock);
+ * donor(owner) = current->blocked_donor;
+ * unlock(&wait_lock);
+ *
+ * wake_up_q();
+ * ...
+ * ttwu_runnable()
+ * __task_rq_lock()
+ * lock(&wait_lock);
+ * owner == p
+ *
+ * Which leaves us to finish the ttwu_runnable() and make it go.
+ *
+ * So schedule rq->idle so that ttwu_runnable() can get the rq
+ * lock and mark owner as running.
+ */
+ return proxy_resched_idle(rq);
+ }
/*
- * Something changed in the blocked_on chain and
- * we don't know if only at this level. So, let's
- * just bail out completely and let __schedule()
- * figure things out (pick_again loop).
+ * OK, now we're absolutely sure @owner is on this
+ * rq, therefore holding @rq->lock is sufficient to
+ * guarantee its existence, as per ttwu_remote().
*/
- return NULL; /* do pick_next_task() again */
}
- return proxy_deactivate(rq, donor);
+
+ WARN_ON_ONCE(owner && !owner->on_rq);
+ return owner;
}
#else /* SCHED_PROXY_EXEC */
static struct task_struct *
next = find_proxy_task(rq, next, &rf);
if (!next)
goto pick_again;
+ if (next == rq->idle)
+ goto keep_resched;
}
picked:
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
+keep_resched:
rq->last_seen_need_resched_ns = 0;
is_switch = prev != next;
* 2) throttled_lb_pair, or
* 3) cannot be migrated to this CPU due to cpus_ptr, or
* 4) running (obviously), or
- * 5) are cache-hot on their current CPU.
+ * 5) are cache-hot on their current CPU, or
+ * 6) are blocked on mutexes (if SCHED_PROXY_EXEC is enabled)
*/
if ((p->se.sched_delayed) && (env->migration_type != migrate_load))
return 0;
if (kthread_is_per_cpu(p))
return 0;
+ if (task_is_blocked(p))
+ return 0;
+
if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
int cpu;
/* Record that we found at least one task that could run on dst_cpu */
env->flags &= ~LBF_ALL_PINNED;
- if (task_on_cpu(env->src_rq, p)) {
+ if (task_on_cpu(env->src_rq, p) ||
+ task_current_donor(env->src_rq, p)) {
schedstat_inc(p->stats.nr_failed_migrations_running);
return 0;
}
schedstat_inc(p->stats.nr_forced_migrations);
}
+ WARN_ON(task_current(env->src_rq, p));
+ WARN_ON(task_current_donor(env->src_rq, p));
+
deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
set_task_cpu(p, env->dst_cpu);
}