sched/core: Fix TASK_DEAD race in finish_task_switch()

author Peter Zijlstra <peterz@infradead.org>

Tue, 29 Sep 2015 12:45:09 +0000 (14:45 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 22 Oct 2015 21:49:19 +0000 (14:49 -0700)
author Peter Zijlstra <peterz@infradead.org>
Tue, 29 Sep 2015 12:45:09 +0000 (14:45 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 22 Oct 2015 21:49:19 +0000 (14:49 -0700)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index e2f9d020e4f68c124b4b8731679604e506f92867..20771a55029d3a1157cb18887c123d0a44abf46d 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2461,11 +2461,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
          * If a task dies, then it sets TASK_DEAD in tsk->state and calls
          * schedule one last time. The schedule call will never return, and
          * the scheduled task must drop that reference.
-        * The test for TASK_DEAD must occur while the runqueue locks are
-        * still held, otherwise prev could be scheduled on another cpu, die
-        * there before we look at prev->state, and then the reference would
-        * be dropped twice.
-        *              Manfred Spraul <manfred@colorfullife.com>
+        *
+        * We must observe prev->state before clearing prev->on_cpu (in
+        * finish_lock_switch), otherwise a concurrent wakeup can get prev
+        * running on another CPU and we could rave with its RUNNING -> DEAD
+        * transition, resulting in a double drop.
          */
         prev_state = prev->state;
         vtime_task_switch(prev);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 84d48790bb6d0607e5dd67dd449f0353c51b0136..08ab96b366bfa1253e44119b716424299038ee42 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1091,9 +1091,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
          * After ->on_cpu is cleared, the task can be moved to a different CPU.
          * We must ensure this doesn't happen until the switch is completely
          * finished.
+        *
+        * Pairs with the control dependency and rmb in try_to_wake_up().
          */
-       smp_wmb();
-       prev->on_cpu = 0;
+       smp_store_release(&prev->on_cpu, 0);
  #endif
  #ifdef CONFIG_DEBUG_SPINLOCK
         /* this is a valid case when another task releases the spinlock */
author	Peter Zijlstra <peterz@infradead.org>
	Tue, 29 Sep 2015 12:45:09 +0000 (14:45 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 22 Oct 2015 21:49:19 +0000 (14:49 -0700)
kernel/sched/core.c		patch \| blob \| blame \| history
kernel/sched/sched.h		patch \| blob \| blame \| history