.32 patches

author Greg Kroah-Hartman <gregkh@suse.de>

Thu, 16 Sep 2010 21:06:53 +0000 (14:06 -0700)

committer Greg Kroah-Hartman <gregkh@suse.de>

Thu, 16 Sep 2010 21:06:53 +0000 (14:06 -0700)
author Greg Kroah-Hartman <gregkh@suse.de>
Thu, 16 Sep 2010 21:06:53 +0000 (14:06 -0700)
committer Greg Kroah-Hartman <gregkh@suse.de>
Thu, 16 Sep 2010 21:06:53 +0000 (14:06 -0700)
diff --git a/queue-2.6.32/sched-add-pre-and-post-wakeup-hooks.patch b/queue-2.6.32/sched-add-pre-and-post-wakeup-hooks.patch

new file mode 100644 (file)

index 0000000..f5f334f
--- /dev/null
+++ b/queue-2.6.32/sched-add-pre-and-post-wakeup-hooks.patch
@@ -0,0 +1,95 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:05:38 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Wed, 16 Dec 2009 18:04:40 +0100
+Subject: sched: Add pre and post wakeup hooks
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <ef872be6401162a2dde3aa635a318b120bd6ee89.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit efbbd05a595343a413964ad85a2ad359b7b7efbd upstream
+
+As will be apparent in the next patch, we need a pre wakeup hook
+for sched_fair task migration, hence rename the post wakeup hook
+and one pre wakeup.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091216170518.114746117@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    3 ++-
+ kernel/sched.c        |   12 ++++++++----
+ kernel/sched_rt.c     |    4 ++--
+ 3 files changed, 12 insertions(+), 7 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1093,7 +1093,8 @@ struct sched_class {
+                             enum cpu_idle_type idle);
+       void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+       void (*post_schedule) (struct rq *this_rq);
+-      void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
++      void (*task_waking) (struct rq *this_rq, struct task_struct *task);
++      void (*task_woken) (struct rq *this_rq, struct task_struct *task);
+ 
+       void (*set_cpus_allowed)(struct task_struct *p,
+                                const struct cpumask *newmask);
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2438,6 +2438,10 @@ static int try_to_wake_up(struct task_st
+       if (task_contributes_to_load(p))
+               rq->nr_uninterruptible--;
+       p->state = TASK_WAKING;
++
++      if (p->sched_class->task_waking)
++              p->sched_class->task_waking(rq, p);
++
+       __task_rq_unlock(rq);
+ 
+       cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+@@ -2501,8 +2505,8 @@ out_running:
+ 
+       p->state = TASK_RUNNING;
+ #ifdef CONFIG_SMP
+-      if (p->sched_class->task_wake_up)
+-              p->sched_class->task_wake_up(rq, p);
++      if (p->sched_class->task_woken)
++              p->sched_class->task_woken(rq, p);
+ 
+       if (unlikely(rq->idle_stamp)) {
+               u64 delta = rq->clock - rq->idle_stamp;
+@@ -2693,8 +2697,8 @@ void wake_up_new_task(struct task_struct
+       trace_sched_wakeup_new(rq, p, 1);
+       check_preempt_curr(rq, p, WF_FORK);
+ #ifdef CONFIG_SMP
+-      if (p->sched_class->task_wake_up)
+-              p->sched_class->task_wake_up(rq, p);
++      if (p->sched_class->task_woken)
++              p->sched_class->task_woken(rq, p);
+ #endif
+       task_rq_unlock(rq, &flags);
+ }
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -1485,7 +1485,7 @@ static void post_schedule_rt(struct rq *
+  * If we are not running and we are not going to reschedule soon, we should
+  * try to push tasks away now
+  */
+-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
++static void task_woken_rt(struct rq *rq, struct task_struct *p)
+ {
+       if (!task_running(rq, p) &&
+           !test_tsk_need_resched(rq->curr) &&
+@@ -1766,7 +1766,7 @@ static const struct sched_class rt_sched
+       .rq_offline             = rq_offline_rt,
+       .pre_schedule           = pre_schedule_rt,
+       .post_schedule          = post_schedule_rt,
+-      .task_wake_up           = task_wake_up_rt,
++      .task_woken             = task_woken_rt,
+       .switched_from          = switched_from_rt,
+ #endif
+ 
diff --git a/queue-2.6.32/sched-clean-up-ttwu-rq-locking.patch b/queue-2.6.32/sched-clean-up-ttwu-rq-locking.patch

new file mode 100644 (file)

index 0000000..0c5a75d
--- /dev/null
+++ b/queue-2.6.32/sched-clean-up-ttwu-rq-locking.patch
@@ -0,0 +1,48 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:02:30 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 27 Nov 2009 15:44:43 +0100
+Subject: sched: Clean up ttwu() rq locking
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <2c1be9e75310adeb8e4f3f0c1bf356cd3d893ab0.1283514306.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit ab19cb23313733c55e0517607844b86720b35f5f upstream
+
+Since set_task_clock() doesn't rely on rq->clock anymore we can simplyfy
+the mess in ttwu().
+
+Optimize things a bit by not fiddling with the IRQ state there.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2379,16 +2379,14 @@ static int try_to_wake_up(struct task_st
+       if (task_contributes_to_load(p))
+               rq->nr_uninterruptible--;
+       p->state = TASK_WAKING;
+-      task_rq_unlock(rq, &flags);
++      __task_rq_unlock(rq);
+ 
+       cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+       if (cpu != orig_cpu)
+               set_task_cpu(p, cpu);
+ 
+-      rq = task_rq_lock(p, &flags);
+-
+-      if (rq != orig_rq)
+-              update_rq_clock(rq);
++      rq = __task_rq_lock(p);
++      update_rq_clock(rq);
+ 
+       WARN_ON(p->state != TASK_WAKING);
+       cpu = task_cpu(p);
diff --git a/queue-2.6.32/sched-consolidate-select_task_rq-callers.patch b/queue-2.6.32/sched-consolidate-select_task_rq-callers.patch

new file mode 100644 (file)

index 0000000..f83ffde
--- /dev/null
+++ b/queue-2.6.32/sched-consolidate-select_task_rq-callers.patch
@@ -0,0 +1,70 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:01:49 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Wed, 25 Nov 2009 13:31:39 +0100
+Subject: sched: Consolidate select_task_rq() callers
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <24492a0819920ab40aab02b943524de7e090d3d3.1283514306.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 970b13bacba14a8cef6f642861947df1d175b0b3 upstream
+
+sched: Consolidate select_task_rq() callers
+
+Small cleanup.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+[ v2: build fix ]
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2333,6 +2333,14 @@ void task_oncpu_function_call(struct tas
+       preempt_enable();
+ }
+ 
++#ifdef CONFIG_SMP
++static inline
++int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
++{
++      return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
++}
++#endif
++
+ /***
+  * try_to_wake_up - wake up a thread
+  * @p: the to-be-woken-up thread
+@@ -2386,7 +2394,7 @@ static int try_to_wake_up(struct task_st
+       p->state = TASK_WAKING;
+       task_rq_unlock(rq, &flags);
+ 
+-      cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
++      cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+       if (cpu != orig_cpu)
+               set_task_cpu(p, cpu);
+ 
+@@ -2601,7 +2609,7 @@ void sched_fork(struct task_struct *p, i
+               p->sched_class = &fair_sched_class;
+ 
+ #ifdef CONFIG_SMP
+-      cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
++      cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
+ #endif
+       set_task_cpu(p, cpu);
+ 
+@@ -3170,7 +3178,7 @@ out:
+ void sched_exec(void)
+ {
+       int new_cpu, this_cpu = get_cpu();
+-      new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
++      new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
+       put_cpu();
+       if (new_cpu != this_cpu)
+               sched_migrate_task(current, new_cpu);
diff --git a/queue-2.6.32/sched-ensure-set_task_cpu-is-never-called-on-blocked-tasks.patch b/queue-2.6.32/sched-ensure-set_task_cpu-is-never-called-on-blocked-tasks.patch

new file mode 100644 (file)

index 0000000..2d29882
--- /dev/null
+++ b/queue-2.6.32/sched-ensure-set_task_cpu-is-never-called-on-blocked-tasks.patch
@@ -0,0 +1,191 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:04:28 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Wed, 16 Dec 2009 18:04:36 +0100
+Subject: sched: Ensure set_task_cpu() is never called on blocked tasks
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <7cf1b7473e78ce3c210ddde81f1e72e778aecffb.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit e2912009fb7b715728311b0d8fe327a1432b3f79 upstream
+
+In order to clean up the set_task_cpu() rq dependencies we need
+to ensure it is never called on blocked tasks because such usage
+does not pair with consistent rq->lock usage.
+
+This puts the migration burden on ttwu().
+
+Furthermore we need to close a race against changing
+->cpus_allowed, since select_task_rq() runs with only preemption
+disabled.
+
+For sched_fork() this is safe because the child isn't in the
+tasklist yet, for wakeup we fix this by synchronizing
+set_cpus_allowed_ptr() against TASK_WAKING, which leaves
+sched_exec to be a problem
+
+This also closes a hole in (6ad4c1888 sched: Fix balance vs
+hotplug race) where ->select_task_rq() doesn't validate the
+result against the sched_domain/root_domain.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091216170517.807938893@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   83 +++++++++++++++++++++++++++++++++++++++++++++------------
+ 1 file changed, 66 insertions(+), 17 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2017,21 +2017,15 @@ static inline void check_class_changed(s
+  */
+ void kthread_bind(struct task_struct *p, unsigned int cpu)
+ {
+-      struct rq *rq = cpu_rq(cpu);
+-      unsigned long flags;
+-
+       /* Must have done schedule() in kthread() before we set_task_cpu */
+       if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+               WARN_ON(1);
+               return;
+       }
+ 
+-      spin_lock_irqsave(&rq->lock, flags);
+-      set_task_cpu(p, cpu);
+       p->cpus_allowed = cpumask_of_cpu(cpu);
+       p->rt.nr_cpus_allowed = 1;
+       p->flags |= PF_THREAD_BOUND;
+-      spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ EXPORT_SYMBOL(kthread_bind);
+ 
+@@ -2072,6 +2066,14 @@ void set_task_cpu(struct task_struct *p,
+       struct cfs_rq *old_cfsrq = task_cfs_rq(p),
+                     *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
+ 
++#ifdef CONFIG_SCHED_DEBUG
++      /*
++       * We should never call set_task_cpu() on a blocked task,
++       * ttwu() will sort out the placement.
++       */
++      WARN_ON(p->state != TASK_RUNNING && p->state != TASK_WAKING);
++#endif
++
+       trace_sched_migrate_task(p, new_cpu);
+ 
+       if (old_cpu != new_cpu) {
+@@ -2105,12 +2107,10 @@ migrate_task(struct task_struct *p, int
+ 
+       /*
+        * If the task is not on a runqueue (and not running), then
+-       * it is sufficient to simply update the task's cpu field.
++       * the next wake-up will properly place the task.
+        */
+-      if (!p->se.on_rq && !task_running(rq, p)) {
+-              set_task_cpu(p, dest_cpu);
++      if (!p->se.on_rq && !task_running(rq, p))
+               return 0;
+-      }
+ 
+       init_completion(&req->done);
+       req->task = p;
+@@ -2316,10 +2316,42 @@ void task_oncpu_function_call(struct tas
+ }
+ 
+ #ifdef CONFIG_SMP
++/*
++ * Called from:
++ *
++ *  - fork, @p is stable because it isn't on the tasklist yet
++ *
++ *  - exec, @p is unstable XXX
++ *
++ *  - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
++ *             we should be good.
++ */
+ static inline
+ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+ {
+-      return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
++      int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
++
++      /*
++       * In order not to call set_task_cpu() on a blocking task we need
++       * to rely on ttwu() to place the task on a valid ->cpus_allowed
++       * cpu.
++       *
++       * Since this is common to all placement strategies, this lives here.
++       *
++       * [ this allows ->select_task() to simply return task_cpu(p) and
++       *   not worry about this generic constraint ]
++       */
++      if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
++                   !cpu_active(cpu))) {
++
++              cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
++              /*
++               * XXX: race against hot-plug modifying cpu_active_mask
++               */
++              BUG_ON(cpu >= nr_cpu_ids);
++      }
++
++      return cpu;
+ }
+ #endif
+ 
+@@ -7128,7 +7160,23 @@ int set_cpus_allowed_ptr(struct task_str
+       struct rq *rq;
+       int ret = 0;
+ 
++      /*
++       * Since we rely on wake-ups to migrate sleeping tasks, don't change
++       * the ->cpus_allowed mask from under waking tasks, which would be
++       * possible when we change rq->lock in ttwu(), so synchronize against
++       * TASK_WAKING to avoid that.
++       */
++again:
++      while (p->state == TASK_WAKING)
++              cpu_relax();
++
+       rq = task_rq_lock(p, &flags);
++
++      if (p->state == TASK_WAKING) {
++              task_rq_unlock(rq, &flags);
++              goto again;
++      }
++
+       if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+               ret = -EINVAL;
+               goto out;
+@@ -7184,7 +7232,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ {
+       struct rq *rq_dest, *rq_src;
+-      int ret = 0, on_rq;
++      int ret = 0;
+ 
+       if (unlikely(!cpu_active(dest_cpu)))
+               return ret;
+@@ -7200,12 +7248,13 @@ static int __migrate_task(struct task_st
+       if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+               goto fail;
+ 
+-      on_rq = p->se.on_rq;
+-      if (on_rq)
++      /*
++       * If we're not on a rq, the next wake-up will ensure we're
++       * placed properly.
++       */
++      if (p->se.on_rq) {
+               deactivate_task(rq_src, p, 0);
+-
+-      set_task_cpu(p, dest_cpu);
+-      if (on_rq) {
++              set_task_cpu(p, dest_cpu);
+               activate_task(rq_dest, p, 0);
+               check_preempt_curr(rq_dest, p, 0);
+       }
diff --git a/queue-2.6.32/sched-fix-broken-assertion.patch b/queue-2.6.32/sched-fix-broken-assertion.patch

new file mode 100644 (file)

index 0000000..d00e6e2
--- /dev/null
+++ b/queue-2.6.32/sched-fix-broken-assertion.patch
@@ -0,0 +1,39 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:05:00 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu, 17 Dec 2009 13:16:31 +0100
+Subject: sched: Fix broken assertion
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <442ca4a574ee45640c3d99809b6171fb9d3f6646.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 077614ee1e93245a3b9a4e1213659405dbeb0ba6 upstream
+
+There's a preemption race in the set_task_cpu() debug check in
+that when we get preempted after setting task->state we'd still
+be on the rq proper, but fail the test.
+
+Check for preempted tasks, since those are always on the RQ.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20091217121830.137155561@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2071,7 +2071,8 @@ void set_task_cpu(struct task_struct *p,
+        * We should never call set_task_cpu() on a blocked task,
+        * ttwu() will sort out the placement.
+        */
+-      WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING);
++      WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
++                      !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
+ #endif
+ 
+       trace_sched_migrate_task(p, new_cpu);
diff --git a/queue-2.6.32/sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch b/queue-2.6.32/sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch

new file mode 100644 (file)

index 0000000..8e5cb4c
--- /dev/null
+++ b/queue-2.6.32/sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch
@@ -0,0 +1,184 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:06:15 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu, 21 Jan 2010 21:04:57 +0100
+Subject: sched: Fix fork vs hotplug vs cpuset namespaces
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <d69112210cf6fa5909dcd5d12105979f694c228c.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit fabf318e5e4bda0aca2b0d617b191884fda62703 upstream
+
+There are a number of issues:
+
+1) TASK_WAKING vs cgroup_clone (cpusets)
+
+copy_process():
+
+  sched_fork()
+    child->state = TASK_WAKING; /* waiting for wake_up_new_task() */
+  if (current->nsproxy != p->nsproxy)
+     ns_cgroup_clone()
+       cgroup_clone()
+         mutex_lock(inode->i_mutex)
+         mutex_lock(cgroup_mutex)
+         cgroup_attach_task()
+          ss->can_attach()
+           ss->attach() [ -> cpuset_attach() ]
+             cpuset_attach_task()
+               set_cpus_allowed_ptr();
+                 while (child->state == TASK_WAKING)
+                   cpu_relax();
+will deadlock the system.
+
+2) cgroup_clone (cpusets) vs copy_process
+
+So even if the above would work we still have:
+
+copy_process():
+
+  if (current->nsproxy != p->nsproxy)
+     ns_cgroup_clone()
+       cgroup_clone()
+         mutex_lock(inode->i_mutex)
+         mutex_lock(cgroup_mutex)
+         cgroup_attach_task()
+          ss->can_attach()
+           ss->attach() [ -> cpuset_attach() ]
+             cpuset_attach_task()
+               set_cpus_allowed_ptr();
+  ...
+
+  p->cpus_allowed = current->cpus_allowed
+
+over-writing the modified cpus_allowed.
+
+3) fork() vs hotplug
+
+  if we unplug the child's cpu after the sanity check when the child
+  gets attached to the task_list but before wake_up_new_task() shit
+  will meet with fan.
+
+Solve all these issues by moving fork cpu selection into
+wake_up_new_task().
+
+Reported-by: Serge E. Hallyn <serue@us.ibm.com>
+Tested-by: Serge E. Hallyn <serue@us.ibm.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1264106190.4283.1314.camel@laptop>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/fork.c  |   15 ---------------
+ kernel/sched.c |   39 +++++++++++++++++++++++++++------------
+ 2 files changed, 27 insertions(+), 27 deletions(-)
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1233,21 +1233,6 @@ static struct task_struct *copy_process(
+       /* Need tasklist lock for parent etc handling! */
+       write_lock_irq(&tasklist_lock);
+ 
+-      /*
+-       * The task hasn't been attached yet, so its cpus_allowed mask will
+-       * not be changed, nor will its assigned CPU.
+-       *
+-       * The cpus_allowed mask of the parent may have changed after it was
+-       * copied first time - so re-copy it here, then check the child's CPU
+-       * to ensure it is on a valid CPU (and if not, just force it back to
+-       * parent's CPU). This avoids alot of nasty races.
+-       */
+-      p->cpus_allowed = current->cpus_allowed;
+-      p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
+-      if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
+-                      !cpu_online(task_cpu(p))))
+-              set_task_cpu(p, smp_processor_id());
+-
+       /* CLONE_PARENT re-uses the old parent */
+       if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
+               p->real_parent = current->real_parent;
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2351,14 +2351,12 @@ static int select_fallback_rq(int cpu, s
+ }
+ 
+ /*
+- * Called from:
++ * Gets called from 3 sites (exec, fork, wakeup), since it is called without
++ * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
++ * by:
+  *
+- *  - fork, @p is stable because it isn't on the tasklist yet
+- *
+- *  - exec, @p is unstable, retry loop
+- *
+- *  - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
+- *             we should be good.
++ *  exec:           is unstable, retry loop
++ *  fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
+  */
+ static inline
+ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+@@ -2652,9 +2650,6 @@ void sched_fork(struct task_struct *p, i
+       if (p->sched_class->task_fork)
+               p->sched_class->task_fork(p);
+ 
+-#ifdef CONFIG_SMP
+-      cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
+-#endif
+       set_task_cpu(p, cpu);
+ 
+ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+@@ -2684,6 +2679,21 @@ void wake_up_new_task(struct task_struct
+ {
+       unsigned long flags;
+       struct rq *rq;
++      int cpu = get_cpu();
++
++#ifdef CONFIG_SMP
++      /*
++       * Fork balancing, do it here and not earlier because:
++       *  - cpus_allowed can change in the fork path
++       *  - any previously selected cpu might disappear through hotplug
++       *
++       * We still have TASK_WAKING but PF_STARTING is gone now, meaning
++       * ->cpus_allowed is stable, we have preemption disabled, meaning
++       * cpu_online_mask is stable.
++       */
++      cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
++      set_task_cpu(p, cpu);
++#endif
+ 
+       rq = task_rq_lock(p, &flags);
+       BUG_ON(p->state != TASK_WAKING);
+@@ -2697,6 +2707,7 @@ void wake_up_new_task(struct task_struct
+               p->sched_class->task_woken(rq, p);
+ #endif
+       task_rq_unlock(rq, &flags);
++      put_cpu();
+ }
+ 
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+@@ -7198,14 +7209,18 @@ int set_cpus_allowed_ptr(struct task_str
+        * the ->cpus_allowed mask from under waking tasks, which would be
+        * possible when we change rq->lock in ttwu(), so synchronize against
+        * TASK_WAKING to avoid that.
++       *
++       * Make an exception for freshly cloned tasks, since cpuset namespaces
++       * might move the task about, we have to validate the target in
++       * wake_up_new_task() anyway since the cpu might have gone away.
+        */
+ again:
+-      while (p->state == TASK_WAKING)
++      while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
+               cpu_relax();
+ 
+       rq = task_rq_lock(p, &flags);
+ 
+-      if (p->state == TASK_WAKING) {
++      if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
+               task_rq_unlock(rq, &flags);
+               goto again;
+       }
diff --git a/queue-2.6.32/sched-fix-hotplug-hang.patch b/queue-2.6.32/sched-fix-hotplug-hang.patch

new file mode 100644 (file)

index 0000000..6bfa50e
--- /dev/null
+++ b/queue-2.6.32/sched-fix-hotplug-hang.patch
@@ -0,0 +1,41 @@
+From peterz@infradead.org  Thu Sep 16 14:06:03 2010
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Sun, 20 Dec 2009 17:36:27 +0100
+Subject: sched: Fix hotplug hang
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <e5888842aa3ec8d8248bd1f7ea7f2edfbc61a677.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 70f1120527797adb31c68bdc6f1b45e182c342c7 upstream
+
+The hot-unplug kstopmachine usage does a wakeup after
+deactivating the cpu, hence we cannot use cpu_active()
+here but must rely on the good olde online.
+
+Reported-by: Sachin Sant <sachinp@in.ibm.com>
+Reported-by: Jens Axboe <jens.axboe@oracle.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Tested-by: Jens Axboe <jens.axboe@oracle.com>
+Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
+Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+LKML-Reference: <1261326987.4314.24.camel@laptop>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2376,7 +2376,7 @@ int select_task_rq(struct task_struct *p
+        *   not worry about this generic constraint ]
+        */
+       if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+-                   !cpu_active(cpu)))
++                   !cpu_online(cpu)))
+               cpu = select_fallback_rq(task_cpu(p), p);
+ 
+       return cpu;
diff --git a/queue-2.6.32/sched-fix-sched_exec-balancing.patch b/queue-2.6.32/sched-fix-sched_exec-balancing.patch

new file mode 100644 (file)

index 0000000..97f53da
--- /dev/null
+++ b/queue-2.6.32/sched-fix-sched_exec-balancing.patch
@@ -0,0 +1,109 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:05:16 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Wed, 16 Dec 2009 18:04:37 +0100
+Subject: sched: Fix sched_exec() balancing
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <bfd9986c2b980fe52facdd6aee757e8fe1181988.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 3802290628348674985d14914f9bfee7b9084548 upstream
+
+sched: Fix sched_exec() balancing
+
+Since we access ->cpus_allowed without holding rq->lock we need
+a retry loop to validate the result, this comes for near free
+when we merge sched_migrate_task() into sched_exec() since that
+already does the needed check.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091216170517.884743662@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   45 +++++++++++++++++++++++----------------------
+ 1 file changed, 23 insertions(+), 22 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2322,7 +2322,7 @@ void task_oncpu_function_call(struct tas
+  *
+  *  - fork, @p is stable because it isn't on the tasklist yet
+  *
+- *  - exec, @p is unstable XXX
++ *  - exec, @p is unstable, retry loop
+  *
+  *  - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
+  *             we should be good.
+@@ -3133,21 +3133,36 @@ static void double_rq_unlock(struct rq *
+ }
+ 
+ /*
+- * If dest_cpu is allowed for this process, migrate the task to it.
+- * This is accomplished by forcing the cpu_allowed mask to only
+- * allow dest_cpu, which will force the cpu onto dest_cpu. Then
+- * the cpu_allowed mask is restored.
++ * sched_exec - execve() is a valuable balancing opportunity, because at
++ * this point the task has the smallest effective memory and cache footprint.
+  */
+-static void sched_migrate_task(struct task_struct *p, int dest_cpu)
++void sched_exec(void)
+ {
++      struct task_struct *p = current;
+       struct migration_req req;
++      int dest_cpu, this_cpu;
+       unsigned long flags;
+       struct rq *rq;
+ 
++again:
++      this_cpu = get_cpu();
++      dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
++      if (dest_cpu == this_cpu) {
++              put_cpu();
++              return;
++      }
++
+       rq = task_rq_lock(p, &flags);
++      put_cpu();
++
++      /*
++       * select_task_rq() can race against ->cpus_allowed
++       */
+       if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
+-          || unlikely(!cpu_active(dest_cpu)))
+-              goto out;
++          || unlikely(!cpu_active(dest_cpu))) {
++              task_rq_unlock(rq, &flags);
++              goto again;
++      }
+ 
+       /* force the process onto the specified CPU */
+       if (migrate_task(p, dest_cpu, &req)) {
+@@ -3162,24 +3177,10 @@ static void sched_migrate_task(struct ta
+ 
+               return;
+       }
+-out:
+       task_rq_unlock(rq, &flags);
+ }
+ 
+ /*
+- * sched_exec - execve() is a valuable balancing opportunity, because at
+- * this point the task has the smallest effective memory and cache footprint.
+- */
+-void sched_exec(void)
+-{
+-      int new_cpu, this_cpu = get_cpu();
+-      new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
+-      put_cpu();
+-      if (new_cpu != this_cpu)
+-              sched_migrate_task(current, new_cpu);
+-}
+-
+-/*
+  * pull_task - move a task from a remote runqueue to the local runqueue.
+  * Both runqueues must be locked.
+  */
diff --git a/queue-2.6.32/sched-fix-select_task_rq-vs-hotplug-issues.patch b/queue-2.6.32/sched-fix-select_task_rq-vs-hotplug-issues.patch

new file mode 100644 (file)

index 0000000..e6c57e1
--- /dev/null
+++ b/queue-2.6.32/sched-fix-select_task_rq-vs-hotplug-issues.patch
@@ -0,0 +1,132 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:05:27 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Wed, 16 Dec 2009 18:04:38 +0100
+Subject: sched: Fix select_task_rq() vs hotplug issues
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <fa72acd9492cec1ea5534da3dec42da537dce92c.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 5da9a0fb673a0ea0a093862f95f6b89b3390c31e upstream
+
+Since select_task_rq() is now responsible for guaranteeing
+->cpus_allowed and cpu_active_mask, we need to verify this.
+
+select_task_rq_rt() can blindly return
+smp_processor_id()/task_cpu() without checking the valid masks,
+select_task_rq_fair() can do the same in the rare case that all
+SD_flags are disabled.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091216170517.961475466@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   76 ++++++++++++++++++++++++++++++---------------------------
+ 1 file changed, 40 insertions(+), 36 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2317,6 +2317,43 @@ void task_oncpu_function_call(struct tas
+ }
+ 
+ #ifdef CONFIG_SMP
++static int select_fallback_rq(int cpu, struct task_struct *p)
++{
++      int dest_cpu;
++      const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
++
++      /* Look for allowed, online CPU in same node. */
++      for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
++              if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
++                      return dest_cpu;
++
++      /* Any allowed, online CPU? */
++      dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
++      if (dest_cpu < nr_cpu_ids)
++              return dest_cpu;
++
++      /* No more Mr. Nice Guy. */
++      if (dest_cpu >= nr_cpu_ids) {
++              rcu_read_lock();
++              cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
++              rcu_read_unlock();
++              dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
++
++              /*
++               * Don't tell them about moving exiting tasks or
++               * kernel threads (both mm NULL), since they never
++               * leave kernel.
++               */
++              if (p->mm && printk_ratelimit()) {
++                      printk(KERN_INFO "process %d (%s) no "
++                             "longer affine to cpu%d\n",
++                             task_pid_nr(p), p->comm, cpu);
++              }
++      }
++
++      return dest_cpu;
++}
++
+ /*
+  * Called from:
+  *
+@@ -2343,14 +2380,8 @@ int select_task_rq(struct task_struct *p
+        *   not worry about this generic constraint ]
+        */
+       if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+-                   !cpu_active(cpu))) {
+-
+-              cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+-              /*
+-               * XXX: race against hot-plug modifying cpu_active_mask
+-               */
+-              BUG_ON(cpu >= nr_cpu_ids);
+-      }
++                   !cpu_active(cpu)))
++              cpu = select_fallback_rq(task_cpu(p), p);
+ 
+       return cpu;
+ }
+@@ -7352,37 +7383,10 @@ static int __migrate_task_irq(struct tas
+ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ {
+       int dest_cpu;
+-      const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
+ 
+ again:
+-      /* Look for allowed, online CPU in same node. */
+-      for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+-              if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+-                      goto move;
+-
+-      /* Any allowed, online CPU? */
+-      dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+-      if (dest_cpu < nr_cpu_ids)
+-              goto move;
+-
+-      /* No more Mr. Nice Guy. */
+-      if (dest_cpu >= nr_cpu_ids) {
+-              cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+-              dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
+-
+-              /*
+-               * Don't tell them about moving exiting tasks or
+-               * kernel threads (both mm NULL), since they never
+-               * leave kernel.
+-               */
+-              if (p->mm && printk_ratelimit()) {
+-                      printk(KERN_INFO "process %d (%s) no "
+-                             "longer affine to cpu%d\n",
+-                             task_pid_nr(p), p->comm, dead_cpu);
+-              }
+-      }
++      dest_cpu = select_fallback_rq(dead_cpu, p);
+ 
+-move:
+       /* It can have affinity changed while we were choosing. */
+       if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
+               goto again;
diff --git a/queue-2.6.32/sched-fix-set_cpu_active-in-cpu_down.patch b/queue-2.6.32/sched-fix-set_cpu_active-in-cpu_down.patch

new file mode 100644 (file)

index 0000000..8a9bc7e
--- /dev/null
+++ b/queue-2.6.32/sched-fix-set_cpu_active-in-cpu_down.patch
@@ -0,0 +1,86 @@
+From dfeng@redhat.com  Thu Sep 16 14:04:03 2010
+From: Xiaotian Feng <dfeng@redhat.com>
+Date: Wed, 16 Dec 2009 18:04:32 +0100
+Subject: sched: Fix set_cpu_active() in cpu_down()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <e4e7681ad5248791735729ae6a9a2cf4a2b9368b.1283514307.git.efault@gmx.de>
+
+From: Xiaotian Feng <dfeng@redhat.com>
+
+commit 9ee349ad6d326df3633d43f54202427295999c47 upstream
+
+Sachin found cpu hotplug test failures on powerpc, which made
+the kernel hang on his POWER box.
+
+The problem is that we fail to re-activate a cpu when a
+hot-unplug fails. Fix this by moving the de-activation into
+_cpu_down after doing the initial checks.
+
+Remove the synchronize_sched() calls and rely on those implied
+by rebuilding the sched domains using the new mask.
+
+Reported-by: Sachin Sant <sachinp@in.ibm.com>
+Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
+Tested-by: Sachin Sant <sachinp@in.ibm.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091216170517.500272612@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/cpu.c |   24 +++---------------------
+ 1 file changed, 3 insertions(+), 21 deletions(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -209,6 +209,7 @@ static int __ref _cpu_down(unsigned int
+               return -ENOMEM;
+ 
+       cpu_hotplug_begin();
++      set_cpu_active(cpu, false);
+       err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
+                                       hcpu, -1, &nr_calls);
+       if (err == NOTIFY_BAD) {
+@@ -280,18 +281,6 @@ int __ref cpu_down(unsigned int cpu)
+               goto out;
+       }
+ 
+-      set_cpu_active(cpu, false);
+-
+-      /*
+-       * Make sure the all cpus did the reschedule and are not
+-       * using stale version of the cpu_active_mask.
+-       * This is not strictly necessary becuase stop_machine()
+-       * that we run down the line already provides the required
+-       * synchronization. But it's really a side effect and we do not
+-       * want to depend on the innards of the stop_machine here.
+-       */
+-      synchronize_sched();
+-
+       err = _cpu_down(cpu, 0);
+ 
+ out:
+@@ -382,19 +371,12 @@ int disable_nonboot_cpus(void)
+               return error;
+       cpu_maps_update_begin();
+       first_cpu = cpumask_first(cpu_online_mask);
+-      /* We take down all of the non-boot CPUs in one shot to avoid races
++      /*
++       * We take down all of the non-boot CPUs in one shot to avoid races
+        * with the userspace trying to use the CPU hotplug at the same time
+        */
+       cpumask_clear(frozen_cpus);
+ 
+-      for_each_online_cpu(cpu) {
+-              if (cpu == first_cpu)
+-                      continue;
+-              set_cpu_active(cpu, false);
+-      }
+-
+-      synchronize_sched();
+-
+       printk("Disabling non-boot CPUs ...\n");
+       for_each_online_cpu(cpu) {
+               if (cpu == first_cpu)
diff --git a/queue-2.6.32/sched-make-wakeup-side-and-atomic-variants-of-completion-api-irq-safe.patch b/queue-2.6.32/sched-make-wakeup-side-and-atomic-variants-of-completion-api-irq-safe.patch

new file mode 100644 (file)

index 0000000..ac058e2
--- /dev/null
+++ b/queue-2.6.32/sched-make-wakeup-side-and-atomic-variants-of-completion-api-irq-safe.patch
@@ -0,0 +1,72 @@
+From rjw@sisk.pl  Thu Sep 16 14:03:08 2010
+From: Rafael J.Wysocki <rjw@sisk.pl>
+Date: Sun, 13 Dec 2009 00:07:30 +0100
+Subject: sched: Make wakeup side and atomic variants of completion API irq safe
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <09c3ea5b3483bec5c4181b9dfd61b5da6b15969b.1283514306.git.efault@gmx.de>
+
+From: Rafael J.Wysocki <rjw@sisk.pl>
+
+commit 7539a3b3d1f892dd97eaf094134d7de55c13befe upstream
+
+Alan Stern noticed that all the wakeup side (and atomic) variants of the
+completion APIs should be irq safe, but the newly introduced
+completion_done() and try_wait_for_completion() aren't. The use of the
+irq unsafe variants in IRQ contexts can cause crashes/hangs.
+
+Fix the problem by making them use spin_lock_irqsave() and
+spin_lock_irqrestore().
+
+Reported-by: Alan Stern <stern@rowland.harvard.edu>
+Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Zhang Rui <rui.zhang@intel.com>
+Cc: pm list <linux-pm@lists.linux-foundation.org>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: David Chinner <david@fromorbit.com>
+Cc: Lachlan McIlroy <lachlan@sgi.com>
+LKML-Reference: <200912130007.30541.rjw@sisk.pl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -5947,14 +5947,15 @@ EXPORT_SYMBOL(wait_for_completion_killab
+  */
+ bool try_wait_for_completion(struct completion *x)
+ {
++      unsigned long flags;
+       int ret = 1;
+ 
+-      spin_lock_irq(&x->wait.lock);
++      spin_lock_irqsave(&x->wait.lock, flags);
+       if (!x->done)
+               ret = 0;
+       else
+               x->done--;
+-      spin_unlock_irq(&x->wait.lock);
++      spin_unlock_irqrestore(&x->wait.lock, flags);
+       return ret;
+ }
+ EXPORT_SYMBOL(try_wait_for_completion);
+@@ -5969,12 +5970,13 @@ EXPORT_SYMBOL(try_wait_for_completion);
+  */
+ bool completion_done(struct completion *x)
+ {
++      unsigned long flags;
+       int ret = 1;
+ 
+-      spin_lock_irq(&x->wait.lock);
++      spin_lock_irqsave(&x->wait.lock, flags);
+       if (!x->done)
+               ret = 0;
+-      spin_unlock_irq(&x->wait.lock);
++      spin_unlock_irqrestore(&x->wait.lock, flags);
+       return ret;
+ }
+ EXPORT_SYMBOL(completion_done);
diff --git a/queue-2.6.32/sched-make-warning-less-noisy.patch b/queue-2.6.32/sched-make-warning-less-noisy.patch

new file mode 100644 (file)

index 0000000..d1afe31
--- /dev/null
+++ b/queue-2.6.32/sched-make-warning-less-noisy.patch
@@ -0,0 +1,33 @@
+From mingo@elte.hu  Thu Sep 16 14:04:48 2010
+From: Ingo Molnar <mingo@elte.hu>
+Date: Thu, 17 Dec 2009 06:05:49 +0100
+Subject: sched: Make warning less noisy
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <d51af6ab86e11ff3f358f84d29778221b03815a4.1283514307.git.efault@gmx.de>
+
+From: Ingo Molnar <mingo@elte.hu>
+
+commit 416eb39556a03d1c7e52b0791e9052ccd71db241 upstream
+
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091216170517.807938893@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2071,7 +2071,7 @@ void set_task_cpu(struct task_struct *p,
+        * We should never call set_task_cpu() on a blocked task,
+        * ttwu() will sort out the placement.
+        */
+-      WARN_ON(p->state != TASK_RUNNING && p->state != TASK_WAKING);
++      WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING);
+ #endif
+ 
+       trace_sched_migrate_task(p, new_cpu);
diff --git a/queue-2.6.32/sched-protect-sched_rr_get_param-access-to-task-sched_class.patch b/queue-2.6.32/sched-protect-sched_rr_get_param-access-to-task-sched_class.patch

new file mode 100644 (file)

index 0000000..b4ae27f
--- /dev/null
+++ b/queue-2.6.32/sched-protect-sched_rr_get_param-access-to-task-sched_class.patch
@@ -0,0 +1,116 @@
+From tglx@linutronix.de  Thu Sep 16 14:01:34 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 9 Dec 2009 09:32:03 +0100
+Subject: sched: Protect sched_rr_get_param() access to task->sched_class
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <5b6c8ee25a7849df728e34620f6e39a71dd0ba56.1283514306.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit dba091b9e3522b9d32fc9975e48d3b69633b45f0 upstream
+
+sched_rr_get_param calls
+task->sched_class->get_rr_interval(task) without protection
+against a concurrent sched_setscheduler() call which modifies
+task->sched_class.
+
+Serialize the access with task_rq_lock(task) and hand the rq
+pointer into get_rr_interval() as it's needed at least in the
+sched_fair implementation.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+LKML-Reference: <alpine.LFD.2.00.0912090930120.3089@localhost.localdomain>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h   |    3 ++-
+ kernel/sched.c          |    6 +++++-
+ kernel/sched_fair.c     |    6 +-----
+ kernel/sched_idletask.c |    2 +-
+ kernel/sched_rt.c       |    2 +-
+ 5 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1114,7 +1114,8 @@ struct sched_class {
+       void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+                            int oldprio, int running);
+ 
+-      unsigned int (*get_rr_interval) (struct task_struct *task);
++      unsigned int (*get_rr_interval) (struct rq *rq,
++                                       struct task_struct *task);
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+       void (*moved_group) (struct task_struct *p);
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -6946,6 +6946,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p
+ {
+       struct task_struct *p;
+       unsigned int time_slice;
++      unsigned long flags;
++      struct rq *rq;
+       int retval;
+       struct timespec t;
+ 
+@@ -6962,7 +6964,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p
+       if (retval)
+               goto out_unlock;
+ 
+-      time_slice = p->sched_class->get_rr_interval(p);
++      rq = task_rq_lock(p, &flags);
++      time_slice = p->sched_class->get_rr_interval(rq, p);
++      task_rq_unlock(rq, &flags);
+ 
+       read_unlock(&tasklist_lock);
+       jiffies_to_timespec(time_slice, &t);
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -2003,21 +2003,17 @@ static void moved_group_fair(struct task
+ }
+ #endif
+ 
+-unsigned int get_rr_interval_fair(struct task_struct *task)
++unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
+ {
+       struct sched_entity *se = &task->se;
+-      unsigned long flags;
+-      struct rq *rq;
+       unsigned int rr_interval = 0;
+ 
+       /*
+        * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
+        * idle runqueue:
+        */
+-      rq = task_rq_lock(task, &flags);
+       if (rq->cfs.load.weight)
+               rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
+-      task_rq_unlock(rq, &flags);
+ 
+       return rr_interval;
+ }
+--- a/kernel/sched_idletask.c
++++ b/kernel/sched_idletask.c
+@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq
+               check_preempt_curr(rq, p, 0);
+ }
+ 
+-unsigned int get_rr_interval_idle(struct task_struct *task)
++unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
+ {
+       return 0;
+ }
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -1734,7 +1734,7 @@ static void set_curr_task_rt(struct rq *
+       dequeue_pushable_task(rq, p);
+ }
+ 
+-unsigned int get_rr_interval_rt(struct task_struct *task)
++unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
+ {
+       /*
+        * Time slice is 0 for SCHED_FIFO tasks
diff --git a/queue-2.6.32/sched-protect-task-cpus_allowed-access-in-sched_getaffinity.patch b/queue-2.6.32/sched-protect-task-cpus_allowed-access-in-sched_getaffinity.patch

new file mode 100644 (file)

index 0000000..03bcdf3
--- /dev/null
+++ b/queue-2.6.32/sched-protect-task-cpus_allowed-access-in-sched_getaffinity.patch
@@ -0,0 +1,50 @@
+From tglx@linutronix.de  Thu Sep 16 14:00:26 2010
+Message-Id: <8a8f42c974ba851cdca56d2243ed403812e6e994.1283514306.git.efault@gmx.de>
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 8 Dec 2009 20:24:16 +0000
+Subject: sched: Protect task->cpus_allowed access in sched_getaffinity()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>,
+       Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 3160568371da441b7f2fb57f2f1225404207e8f2 upstream
+
+sched_getaffinity() is not protected against a concurrent
+modification of the tasks affinity.
+
+Serialize the access with task_rq_lock(task).
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+LKML-Reference: <20091208202026.769251187@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -6686,6 +6686,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t
+ long sched_getaffinity(pid_t pid, struct cpumask *mask)
+ {
+       struct task_struct *p;
++      unsigned long flags;
++      struct rq *rq;
+       int retval;
+ 
+       get_online_cpus();
+@@ -6700,7 +6702,9 @@ long sched_getaffinity(pid_t pid, struct
+       if (retval)
+               goto out_unlock;
+ 
++      rq = task_rq_lock(p, &flags);
+       cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
++      task_rq_unlock(rq, &flags);
+ 
+ out_unlock:
+       read_unlock(&tasklist_lock);
diff --git a/queue-2.6.32/sched-remove-forced2_migrations-stats.patch b/queue-2.6.32/sched-remove-forced2_migrations-stats.patch

new file mode 100644 (file)

index 0000000..7c40b17
--- /dev/null
+++ b/queue-2.6.32/sched-remove-forced2_migrations-stats.patch
@@ -0,0 +1,89 @@
+From mingo@elte.hu  Thu Sep 16 14:02:52 2010
+From: Ingo Molnar <mingo@elte.hu>
+Date: Thu, 10 Dec 2009 20:32:39 +0100
+Subject: sched: Remove forced2_migrations stats
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <9d72ed88de455fe6e14baed99ab4b05d6a047ea5.1283514306.git.efault@gmx.de>
+
+From: Ingo Molnar <mingo@elte.hu>
+
+commit b9889ed1ddeca5a3f3569c8de7354e9e97d803ae upstream
+
+This build warning:
+
+ kernel/sched.c: In function 'set_task_cpu':
+ kernel/sched.c:2070: warning: unused variable 'old_rq'
+
+Made me realize that the forced2_migrations stat looks pretty
+pointless (and a misnomer) - remove it.
+
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    1 -
+ kernel/sched.c        |    6 ------
+ kernel/sched_debug.c  |    2 --
+ 3 files changed, 9 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1178,7 +1178,6 @@ struct sched_entity {
+       u64                     nr_failed_migrations_running;
+       u64                     nr_failed_migrations_hot;
+       u64                     nr_forced_migrations;
+-      u64                     nr_forced2_migrations;
+ 
+       u64                     nr_wakeups;
+       u64                     nr_wakeups_sync;
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2069,7 +2069,6 @@ task_hot(struct task_struct *p, u64 now,
+ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+ {
+       int old_cpu = task_cpu(p);
+-      struct rq *old_rq = cpu_rq(old_cpu);
+       struct cfs_rq *old_cfsrq = task_cfs_rq(p),
+                     *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
+ 
+@@ -2077,10 +2076,6 @@ void set_task_cpu(struct task_struct *p,
+ 
+       if (old_cpu != new_cpu) {
+               p->se.nr_migrations++;
+-#ifdef CONFIG_SCHEDSTATS
+-              if (task_hot(p, old_rq->clock, NULL))
+-                      schedstat_inc(p, se.nr_forced2_migrations);
+-#endif
+               perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+                                    1, 1, NULL, 0);
+       }
+@@ -2523,7 +2518,6 @@ static void __sched_fork(struct task_str
+       p->se.nr_failed_migrations_running      = 0;
+       p->se.nr_failed_migrations_hot          = 0;
+       p->se.nr_forced_migrations              = 0;
+-      p->se.nr_forced2_migrations             = 0;
+ 
+       p->se.nr_wakeups                        = 0;
+       p->se.nr_wakeups_sync                   = 0;
+--- a/kernel/sched_debug.c
++++ b/kernel/sched_debug.c
+@@ -423,7 +423,6 @@ void proc_sched_show_task(struct task_st
+       P(se.nr_failed_migrations_running);
+       P(se.nr_failed_migrations_hot);
+       P(se.nr_forced_migrations);
+-      P(se.nr_forced2_migrations);
+       P(se.nr_wakeups);
+       P(se.nr_wakeups_sync);
+       P(se.nr_wakeups_migrate);
+@@ -499,7 +498,6 @@ void proc_sched_set_task(struct task_str
+       p->se.nr_failed_migrations_running      = 0;
+       p->se.nr_failed_migrations_hot          = 0;
+       p->se.nr_forced_migrations              = 0;
+-      p->se.nr_forced2_migrations             = 0;
+       p->se.nr_wakeups                        = 0;
+       p->se.nr_wakeups_sync                   = 0;
+       p->se.nr_wakeups_migrate                = 0;
diff --git a/queue-2.6.32/sched-remove-rq-clock-coupling-from-set_task_cpu.patch b/queue-2.6.32/sched-remove-rq-clock-coupling-from-set_task_cpu.patch

new file mode 100644 (file)

index 0000000..b7126cc
--- /dev/null
+++ b/queue-2.6.32/sched-remove-rq-clock-coupling-from-set_task_cpu.patch
@@ -0,0 +1,59 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:02:16 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 27 Nov 2009 14:12:25 +0100
+Subject: sched: Remove rq->clock coupling from set_task_cpu()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <345ac4629b04cc360d22cc1ed26bb8986930d383.1283514306.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 5afcdab706d6002cb02b567ba46e650215e694e8 upstream
+
+set_task_cpu() should be rq invariant and only touch task state, it
+currently fails to do so, which opens up a few races, since not all
+callers hold both rq->locks.
+
+Remove the relyance on rq->clock, as any site calling set_task_cpu()
+should also do a remote clock update, which should ensure the observed
+time between these two cpus is monotonic, as per
+kernel/sched_clock.c:sched_clock_remote().
+
+Therefore we can simply remove the clock_offset bits and be happy.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   13 +------------
+ 1 file changed, 1 insertion(+), 12 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2069,23 +2069,12 @@ task_hot(struct task_struct *p, u64 now,
+ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+ {
+       int old_cpu = task_cpu(p);
+-      struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
++      struct rq *old_rq = cpu_rq(old_cpu);
+       struct cfs_rq *old_cfsrq = task_cfs_rq(p),
+                     *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
+-      u64 clock_offset;
+-
+-      clock_offset = old_rq->clock - new_rq->clock;
+ 
+       trace_sched_migrate_task(p, new_cpu);
+ 
+-#ifdef CONFIG_SCHEDSTATS
+-      if (p->se.wait_start)
+-              p->se.wait_start -= clock_offset;
+-      if (p->se.sleep_start)
+-              p->se.sleep_start -= clock_offset;
+-      if (p->se.block_start)
+-              p->se.block_start -= clock_offset;
+-#endif
+       if (old_cpu != new_cpu) {
+               p->se.nr_migrations++;
+ #ifdef CONFIG_SCHEDSTATS
diff --git a/queue-2.6.32/sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch b/queue-2.6.32/sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch

new file mode 100644 (file)

index 0000000..bda820c
--- /dev/null
+++ b/queue-2.6.32/sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch
@@ -0,0 +1,222 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:05:50 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Wed, 16 Dec 2009 18:04:41 +0100
+Subject: sched: Remove the cfs_rq dependency from set_task_cpu()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <2b12c4cd8419eba24436eeca57930b6a84f787b6.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 88ec22d3edb72b261f8628226cd543589a6d5e1b upstream
+
+In order to remove the cfs_rq dependency from set_task_cpu() we
+need to ensure the task is cfs_rq invariant for all callsites.
+
+The simple approach is to substract cfs_rq->min_vruntime from
+se->vruntime on dequeue, and add cfs_rq->min_vruntime on
+enqueue.
+
+However, this has the downside of breaking FAIR_SLEEPERS since
+we loose the old vruntime as we only maintain the relative
+position.
+
+To solve this, we observe that we only migrate runnable tasks,
+we do this using deactivate_task(.sleep=0) and
+activate_task(.wakeup=0), therefore we can restrain the
+min_vruntime invariance to that state.
+
+The only other case is wakeup balancing, since we want to
+maintain the old vruntime we cannot make it relative on dequeue,
+but since we don't migrate inactive tasks, we can do so right
+before we activate it again.
+
+This is where we need the new pre-wakeup hook, we need to call
+this while still holding the old rq->lock. We could fold it into
+->select_task_rq(), but since that has multiple callsites and
+would obfuscate the locking requirements, that seems like a
+fudge.
+
+This leaves the fork() case, simply make sure that ->task_fork()
+leaves the ->vruntime in a relative state.
+
+This covers all cases where set_task_cpu() gets called, and
+ensures it sees a relative vruntime.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091216170518.191697025@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    2 +-
+ kernel/sched.c        |    6 +-----
+ kernel/sched_fair.c   |   50 ++++++++++++++++++++++++++++++++++++++++++++------
+ 3 files changed, 46 insertions(+), 12 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1118,7 +1118,7 @@ struct sched_class {
+                                        struct task_struct *task);
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+-      void (*moved_group) (struct task_struct *p);
++      void (*moved_group) (struct task_struct *p, int on_rq);
+ #endif
+ };
+ 
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2063,8 +2063,6 @@ task_hot(struct task_struct *p, u64 now,
+ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+ {
+       int old_cpu = task_cpu(p);
+-      struct cfs_rq *old_cfsrq = task_cfs_rq(p),
+-                    *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
+ 
+ #ifdef CONFIG_SCHED_DEBUG
+       /*
+@@ -2082,8 +2080,6 @@ void set_task_cpu(struct task_struct *p,
+               perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+                                    1, 1, NULL, 0);
+       }
+-      p->se.vruntime -= old_cfsrq->min_vruntime -
+-                                       new_cfsrq->min_vruntime;
+ 
+       __set_task_cpu(p, new_cpu);
+ }
+@@ -10144,7 +10140,7 @@ void sched_move_task(struct task_struct
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+       if (tsk->sched_class->moved_group)
+-              tsk->sched_class->moved_group(tsk);
++              tsk->sched_class->moved_group(tsk, on_rq);
+ #endif
+ 
+       if (unlikely(running))
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -488,6 +488,7 @@ __update_curr(struct cfs_rq *cfs_rq, str
+       curr->sum_exec_runtime += delta_exec;
+       schedstat_add(cfs_rq, exec_clock, delta_exec);
+       delta_exec_weighted = calc_delta_fair(delta_exec, curr);
++
+       curr->vruntime += delta_exec_weighted;
+       update_min_vruntime(cfs_rq);
+ }
+@@ -743,16 +744,26 @@ place_entity(struct cfs_rq *cfs_rq, stru
+       se->vruntime = vruntime;
+ }
+ 
++#define ENQUEUE_WAKEUP        1
++#define ENQUEUE_MIGRATE 2
++
+ static void
+-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
++enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ {
+       /*
++       * Update the normalized vruntime before updating min_vruntime
++       * through callig update_curr().
++       */
++      if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
++              se->vruntime += cfs_rq->min_vruntime;
++
++      /*
+        * Update run-time statistics of the 'current'.
+        */
+       update_curr(cfs_rq);
+       account_entity_enqueue(cfs_rq, se);
+ 
+-      if (wakeup) {
++      if (flags & ENQUEUE_WAKEUP) {
+               place_entity(cfs_rq, se, 0);
+               enqueue_sleeper(cfs_rq, se);
+       }
+@@ -806,6 +817,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
+               __dequeue_entity(cfs_rq, se);
+       account_entity_dequeue(cfs_rq, se);
+       update_min_vruntime(cfs_rq);
++
++      /*
++       * Normalize the entity after updating the min_vruntime because the
++       * update can refer to the ->curr item and we need to reflect this
++       * movement in our normalized position.
++       */
++      if (!sleep)
++              se->vruntime -= cfs_rq->min_vruntime;
+ }
+ 
+ /*
+@@ -1016,13 +1035,19 @@ static void enqueue_task_fair(struct rq
+ {
+       struct cfs_rq *cfs_rq;
+       struct sched_entity *se = &p->se;
++      int flags = 0;
++
++      if (wakeup)
++              flags |= ENQUEUE_WAKEUP;
++      if (p->state == TASK_WAKING)
++              flags |= ENQUEUE_MIGRATE;
+ 
+       for_each_sched_entity(se) {
+               if (se->on_rq)
+                       break;
+               cfs_rq = cfs_rq_of(se);
+-              enqueue_entity(cfs_rq, se, wakeup);
+-              wakeup = 1;
++              enqueue_entity(cfs_rq, se, flags);
++              flags = ENQUEUE_WAKEUP;
+       }
+ 
+       hrtick_update(rq);
+@@ -1098,6 +1123,14 @@ static void yield_task_fair(struct rq *r
+ 
+ #ifdef CONFIG_SMP
+ 
++static void task_waking_fair(struct rq *rq, struct task_struct *p)
++{
++      struct sched_entity *se = &p->se;
++      struct cfs_rq *cfs_rq = cfs_rq_of(se);
++
++      se->vruntime -= cfs_rq->min_vruntime;
++}
++
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ /*
+  * effective_load() calculates the load change as seen from the root_task_group
+@@ -1943,6 +1976,8 @@ static void task_fork_fair(struct task_s
+               resched_task(rq->curr);
+       }
+ 
++      se->vruntime -= cfs_rq->min_vruntime;
++
+       spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ 
+@@ -1996,12 +2031,13 @@ static void set_curr_task_fair(struct rq
+ }
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+-static void moved_group_fair(struct task_struct *p)
++static void moved_group_fair(struct task_struct *p, int on_rq)
+ {
+       struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ 
+       update_curr(cfs_rq);
+-      place_entity(cfs_rq, &p->se, 1);
++      if (!on_rq)
++              place_entity(cfs_rq, &p->se, 1);
+ }
+ #endif
+ 
+@@ -2041,6 +2077,8 @@ static const struct sched_class fair_sch
+       .move_one_task          = move_one_task_fair,
+       .rq_online              = rq_online_fair,
+       .rq_offline             = rq_offline_fair,
++
++      .task_waking            = task_waking_fair,
+ #endif
+ 
+       .set_curr_task          = set_curr_task_fair,
diff --git a/queue-2.6.32/sched-remove-unused-cpu_nr_migrations.patch b/queue-2.6.32/sched-remove-unused-cpu_nr_migrations.patch

new file mode 100644 (file)

index 0000000..fb922e3
--- /dev/null
+++ b/queue-2.6.32/sched-remove-unused-cpu_nr_migrations.patch
@@ -0,0 +1,69 @@
+From h-shimamoto@ct.jp.nec.com  Thu Sep 16 14:02:02 2010
+From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
+Date: Wed, 4 Nov 2009 16:16:54 +0900
+Subject: sched: Remove unused cpu_nr_migrations()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <2138ef8909e9abf67502990b21f16d6f078ed83f.1283514306.git.efault@gmx.de>
+
+From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
+
+commit 9824a2b728b63e7ff586b9fd9293c819be79f0f3 upstream
+
+cpu_nr_migrations() is not used, remove it.
+
+Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
+Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <4AF12A66.6020609@ct.jp.nec.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    1 -
+ kernel/sched.c        |   11 -----------
+ 2 files changed, 12 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void)
+ 
+ 
+ extern void calc_global_load(void);
+-extern u64 cpu_nr_migrations(int cpu);
+ 
+ extern unsigned long get_parent_ip(unsigned long addr);
+ 
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -542,7 +542,6 @@ struct rq {
+       struct load_weight load;
+       unsigned long nr_load_updates;
+       u64 nr_switches;
+-      u64 nr_migrations_in;
+ 
+       struct cfs_rq cfs;
+       struct rt_rq rt;
+@@ -2089,7 +2088,6 @@ void set_task_cpu(struct task_struct *p,
+ #endif
+       if (old_cpu != new_cpu) {
+               p->se.nr_migrations++;
+-              new_rq->nr_migrations_in++;
+ #ifdef CONFIG_SCHEDSTATS
+               if (task_hot(p, old_rq->clock, NULL))
+                       schedstat_inc(p, se.nr_forced2_migrations);
+@@ -3048,15 +3046,6 @@ static void calc_load_account_active(str
+ }
+ 
+ /*
+- * Externally visible per-cpu scheduler statistics:
+- * cpu_nr_migrations(cpu) - number of migrations into that cpu
+- */
+-u64 cpu_nr_migrations(int cpu)
+-{
+-      return cpu_rq(cpu)->nr_migrations_in;
+-}
+-
+-/*
+  * Update rq->cpu_load[] statistics. This function is usually called every
+  * scheduler tick (TICK_NSEC).
+  */
diff --git a/queue-2.6.32/sched-sanitize-fork-handling.patch b/queue-2.6.32/sched-sanitize-fork-handling.patch

new file mode 100644 (file)

index 0000000..1a9aeb6
--- /dev/null
+++ b/queue-2.6.32/sched-sanitize-fork-handling.patch
@@ -0,0 +1,178 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:02:42 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 27 Nov 2009 17:32:46 +0100
+Subject: sched: Sanitize fork() handling
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <813f92ca73a6ad9adc923c0d8b5bc346429452d4.1283514306.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit cd29fe6f2637cc2ccbda5ac65f5332d6bf5fa3c6 upstream
+
+Currently we try to do task placement in wake_up_new_task() after we do
+the load-balance pass in sched_fork(). This yields complicated semantics
+in that we have to deal with tasks on different RQs and the
+set_task_cpu() calls in copy_process() and sched_fork()
+
+Rename ->task_new() to ->task_fork() and call it from sched_fork()
+before the balancing, this gives the policy a clear point to place the
+task.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    2 +-
+ kernel/sched.c        |   43 ++++++++++++++++++-------------------------
+ kernel/sched_fair.c   |   28 +++++++++++++++-------------
+ 3 files changed, 34 insertions(+), 39 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1104,7 +1104,7 @@ struct sched_class {
+ 
+       void (*set_curr_task) (struct rq *rq);
+       void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
+-      void (*task_new) (struct rq *rq, struct task_struct *p);
++      void (*task_fork) (struct task_struct *p);
+ 
+       void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+                              int running);
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -1821,6 +1821,20 @@ static void cfs_rq_set_shares(struct cfs
+ static void calc_load_account_active(struct rq *this_rq);
+ static void update_sysctl(void);
+ 
++static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++      set_task_rq(p, cpu);
++#ifdef CONFIG_SMP
++      /*
++       * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
++       * successfuly executed on another CPU. We must ensure that updates of
++       * per-task data have been completed by this moment.
++       */
++      smp_wmb();
++      task_thread_info(p)->cpu = cpu;
++#endif
++}
++
+ #include "sched_stats.h"
+ #include "sched_idletask.c"
+ #include "sched_fair.c"
+@@ -1977,20 +1991,6 @@ inline int task_curr(const struct task_s
+       return cpu_curr(task_cpu(p)) == p;
+ }
+ 
+-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+-{
+-      set_task_rq(p, cpu);
+-#ifdef CONFIG_SMP
+-      /*
+-       * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+-       * successfuly executed on another CPU. We must ensure that updates of
+-       * per-task data have been completed by this moment.
+-       */
+-      smp_wmb();
+-      task_thread_info(p)->cpu = cpu;
+-#endif
+-}
+-
+ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+                                      const struct sched_class *prev_class,
+                                      int oldprio, int running)
+@@ -2593,6 +2593,9 @@ void sched_fork(struct task_struct *p, i
+       if (!rt_prio(p->prio))
+               p->sched_class = &fair_sched_class;
+ 
++      if (p->sched_class->task_fork)
++              p->sched_class->task_fork(p);
++
+ #ifdef CONFIG_SMP
+       cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
+ #endif
+@@ -2629,17 +2632,7 @@ void wake_up_new_task(struct task_struct
+       rq = task_rq_lock(p, &flags);
+       BUG_ON(p->state != TASK_RUNNING);
+       update_rq_clock(rq);
+-
+-      if (!p->sched_class->task_new || !current->se.on_rq) {
+-              activate_task(rq, p, 0);
+-      } else {
+-              /*
+-               * Let the scheduling class do new task startup
+-               * management (if any):
+-               */
+-              p->sched_class->task_new(rq, p);
+-              inc_nr_running(rq);
+-      }
++      activate_task(rq, p, 0);
+       trace_sched_wakeup_new(rq, p, 1);
+       check_preempt_curr(rq, p, WF_FORK);
+ #ifdef CONFIG_SMP
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1911,28 +1911,30 @@ static void task_tick_fair(struct rq *rq
+ }
+ 
+ /*
+- * Share the fairness runtime between parent and child, thus the
+- * total amount of pressure for CPU stays equal - new tasks
+- * get a chance to run but frequent forkers are not allowed to
+- * monopolize the CPU. Note: the parent runqueue is locked,
+- * the child is not running yet.
++ * called on fork with the child task as argument from the parent's context
++ *  - child not yet on the tasklist
++ *  - preemption disabled
+  */
+-static void task_new_fair(struct rq *rq, struct task_struct *p)
++static void task_fork_fair(struct task_struct *p)
+ {
+-      struct cfs_rq *cfs_rq = task_cfs_rq(p);
++      struct cfs_rq *cfs_rq = task_cfs_rq(current);
+       struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+       int this_cpu = smp_processor_id();
++      struct rq *rq = this_rq();
++      unsigned long flags;
++
++      spin_lock_irqsave(&rq->lock, flags);
+ 
+-      sched_info_queued(p);
++      if (unlikely(task_cpu(p) != this_cpu))
++              __set_task_cpu(p, this_cpu);
+ 
+       update_curr(cfs_rq);
++
+       if (curr)
+               se->vruntime = curr->vruntime;
+       place_entity(cfs_rq, se, 1);
+ 
+-      /* 'curr' will be NULL if the child belongs to a different group */
+-      if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
+-                      curr && entity_before(curr, se)) {
++      if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
+               /*
+                * Upon rescheduling, sched_class::put_prev_task() will place
+                * 'current' within the tree based on its new key value.
+@@ -1941,7 +1943,7 @@ static void task_new_fair(struct rq *rq,
+               resched_task(rq->curr);
+       }
+ 
+-      enqueue_task_fair(rq, p, 0);
++      spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ 
+ /*
+@@ -2043,7 +2045,7 @@ static const struct sched_class fair_sch
+ 
+       .set_curr_task          = set_curr_task_fair,
+       .task_tick              = task_tick_fair,
+-      .task_new               = task_new_fair,
++      .task_fork              = task_fork_fair,
+ 
+       .prio_changed           = prio_changed_fair,
+       .switched_to            = switched_to_fair,
diff --git a/queue-2.6.32/sched-use-rcu-in-sched_get-set_affinity.patch b/queue-2.6.32/sched-use-rcu-in-sched_get-set_affinity.patch

new file mode 100644 (file)

index 0000000..0c38609
--- /dev/null
+++ b/queue-2.6.32/sched-use-rcu-in-sched_get-set_affinity.patch
@@ -0,0 +1,77 @@
+From tglx@linutronix.de  Thu Sep 16 14:03:36 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 9 Dec 2009 10:15:01 +0000
+Subject: sched: Use rcu in sched_get/set_affinity()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <9939fdaefe12d123e26bdbf51b8b502aca64ae42.1283514306.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 23f5d142519621b16cf2b378cf8adf4dcf01a616 upstream
+
+tasklist_lock is held read locked to protect the
+find_task_by_vpid() call and to prevent the task going away.
+sched_setaffinity acquires a task struct ref and drops tasklist
+lock right away. The access to the cpus_allowed mask is
+protected by rq->lock.
+
+rcu_read_lock() provides the same protection here.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+LKML-Reference: <20091209100706.789059966@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -6563,22 +6563,18 @@ long sched_setaffinity(pid_t pid, const
+       int retval;
+ 
+       get_online_cpus();
+-      read_lock(&tasklist_lock);
++      rcu_read_lock();
+ 
+       p = find_process_by_pid(pid);
+       if (!p) {
+-              read_unlock(&tasklist_lock);
++              rcu_read_unlock();
+               put_online_cpus();
+               return -ESRCH;
+       }
+ 
+-      /*
+-       * It is not safe to call set_cpus_allowed with the
+-       * tasklist_lock held. We will bump the task_struct's
+-       * usage count and then drop tasklist_lock.
+-       */
++      /* Prevent p going away */
+       get_task_struct(p);
+-      read_unlock(&tasklist_lock);
++      rcu_read_unlock();
+ 
+       if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+               retval = -ENOMEM;
+@@ -6664,7 +6660,7 @@ long sched_getaffinity(pid_t pid, struct
+       int retval;
+ 
+       get_online_cpus();
+-      read_lock(&tasklist_lock);
++      rcu_read_lock();
+ 
+       retval = -ESRCH;
+       p = find_process_by_pid(pid);
+@@ -6680,7 +6676,7 @@ long sched_getaffinity(pid_t pid, struct
+       task_rq_unlock(rq, &flags);
+ 
+ out_unlock:
+-      read_unlock(&tasklist_lock);
++      rcu_read_unlock();
+       put_online_cpus();
+ 
+       return retval;
diff --git a/queue-2.6.32/sched-use-rcu-in-sched_get_rr_param.patch b/queue-2.6.32/sched-use-rcu-in-sched_get_rr_param.patch

new file mode 100644 (file)

index 0000000..8b13a2f
--- /dev/null
+++ b/queue-2.6.32/sched-use-rcu-in-sched_get_rr_param.patch
@@ -0,0 +1,60 @@
+From tglx@linutronix.de  Thu Sep 16 14:03:50 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 9 Dec 2009 10:15:11 +0000
+Subject: sched: Use rcu in sched_get_rr_param()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <05d6447642d29f8934b54889b15fd010680d3b57.1283514306.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 1a551ae715825bb2a2107a2dd68de024a1fa4e32 upstream
+
+read_lock(&tasklist_lock) does not protect
+sys_sched_get_rr_param() against a concurrent update of the
+policy or scheduler parameters as do_sched_scheduler() does not
+take the tasklist_lock.
+
+The access to task->sched_class->get_rr_interval is protected by
+task_rq_lock(task).
+
+Use rcu_read_lock() to protect find_task_by_vpid() and prevent
+the task struct from going away.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+LKML-Reference: <20091209100706.862897167@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -6924,7 +6924,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p
+               return -EINVAL;
+ 
+       retval = -ESRCH;
+-      read_lock(&tasklist_lock);
++      rcu_read_lock();
+       p = find_process_by_pid(pid);
+       if (!p)
+               goto out_unlock;
+@@ -6937,13 +6937,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p
+       time_slice = p->sched_class->get_rr_interval(rq, p);
+       task_rq_unlock(rq, &flags);
+ 
+-      read_unlock(&tasklist_lock);
++      rcu_read_unlock();
+       jiffies_to_timespec(time_slice, &t);
+       retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
+       return retval;
+ 
+ out_unlock:
+-      read_unlock(&tasklist_lock);
++      rcu_read_unlock();
+       return retval;
+ }
+ 
diff --git a/queue-2.6.32/sched-use-rcu-in-sys_sched_getscheduler-sys_sched_getparam.patch b/queue-2.6.32/sched-use-rcu-in-sys_sched_getscheduler-sys_sched_getparam.patch

new file mode 100644 (file)

index 0000000..6da76f5
--- /dev/null
+++ b/queue-2.6.32/sched-use-rcu-in-sys_sched_getscheduler-sys_sched_getparam.patch
@@ -0,0 +1,80 @@
+From tglx@linutronix.de  Thu Sep 16 14:03:22 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 9 Dec 2009 10:14:58 +0000
+Subject: sched: Use rcu in sys_sched_getscheduler/sys_sched_getparam()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <c487a7334bdb494d68010d6faeb44e4454300ce0.1283514306.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 5fe85be081edf0ac92d83f9c39e0ab5c1371eb82 upstream
+
+read_lock(&tasklist_lock) does not protect
+sys_sched_getscheduler and sys_sched_getparam() against a
+concurrent update of the policy or scheduler parameters as
+do_sched_setscheduler() does not take the tasklist_lock. The
+accessed integers can be retrieved w/o locking and are snapshots
+anyway.
+
+Using rcu_read_lock() to protect find_task_by_vpid() and prevent
+the task struct from going away is not changing the above
+situation.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+LKML-Reference: <20091209100706.753790977@linutronix.de>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -6505,7 +6505,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_
+               return -EINVAL;
+ 
+       retval = -ESRCH;
+-      read_lock(&tasklist_lock);
++      rcu_read_lock();
+       p = find_process_by_pid(pid);
+       if (p) {
+               retval = security_task_getscheduler(p);
+@@ -6513,7 +6513,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_
+                       retval = p->policy
+                               | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
+       }
+-      read_unlock(&tasklist_lock);
++      rcu_read_unlock();
+       return retval;
+ }
+ 
+@@ -6531,7 +6531,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, p
+       if (!param || pid < 0)
+               return -EINVAL;
+ 
+-      read_lock(&tasklist_lock);
++      rcu_read_lock();
+       p = find_process_by_pid(pid);
+       retval = -ESRCH;
+       if (!p)
+@@ -6542,7 +6542,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, p
+               goto out_unlock;
+ 
+       lp.sched_priority = p->rt_priority;
+-      read_unlock(&tasklist_lock);
++      rcu_read_unlock();
+ 
+       /*
+        * This one might sleep, we cannot do it with a spinlock held ...
+@@ -6552,7 +6552,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, p
+       return retval;
+ 
+ out_unlock:
+-      read_unlock(&tasklist_lock);
++      rcu_read_unlock();
+       return retval;
+ }
+ 
diff --git a/queue-2.6.32/sched-use-task_waking-for-fork-wakups.patch b/queue-2.6.32/sched-use-task_waking-for-fork-wakups.patch

new file mode 100644 (file)

index 0000000..df9bc15
--- /dev/null
+++ b/queue-2.6.32/sched-use-task_waking-for-fork-wakups.patch
@@ -0,0 +1,72 @@
+From a.p.zijlstra@chello.nl  Thu Sep 16 14:04:16 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Wed, 16 Dec 2009 18:04:35 +0100
+Subject: sched: Use TASK_WAKING for fork wakups
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <6745f030761d6cd4b79f13d12c5dee74d2e940fa.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 06b83b5fbea273672822b6ee93e16781046553ec upstream
+
+For later convenience use TASK_WAKING for fresh tasks.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091216170517.732561278@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2538,14 +2538,6 @@ static void __sched_fork(struct task_str
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+       INIT_HLIST_HEAD(&p->preempt_notifiers);
+ #endif
+-
+-      /*
+-       * We mark the process as running here, but have not actually
+-       * inserted it onto the runqueue yet. This guarantees that
+-       * nobody will actually run it, and a signal or other external
+-       * event cannot wake it up and insert it on the runqueue either.
+-       */
+-      p->state = TASK_RUNNING;
+ }
+ 
+ /*
+@@ -2556,6 +2548,12 @@ void sched_fork(struct task_struct *p, i
+       int cpu = get_cpu();
+ 
+       __sched_fork(p);
++      /*
++       * We mark the process as waking here. This guarantees that
++       * nobody will actually run it, and a signal or other external
++       * event cannot wake it up and insert it on the runqueue either.
++       */
++      p->state = TASK_WAKING;
+ 
+       /*
+        * Revert to default priority/policy on fork if requested.
+@@ -2624,7 +2622,8 @@ void wake_up_new_task(struct task_struct
+       struct rq *rq;
+ 
+       rq = task_rq_lock(p, &flags);
+-      BUG_ON(p->state != TASK_RUNNING);
++      BUG_ON(p->state != TASK_WAKING);
++      p->state = TASK_RUNNING;
+       update_rq_clock(rq);
+       activate_task(rq, p, 0);
+       trace_sched_wakeup_new(rq, p, 1);
+@@ -7034,6 +7033,7 @@ void __cpuinit init_idle(struct task_str
+       spin_lock_irqsave(&rq->lock, flags);
+ 
+       __sched_fork(idle);
++      idle->state = TASK_RUNNING;
+       idle->se.exec_start = sched_clock();
+ 
+       cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
diff --git a/queue-2.6.32/series b/queue-2.6.32/series

index eed3181697b33c045555e533ee30addbb4539580..da0d77b7fe0a177852fc9f7645403616049603a2 100644 (file)
--- a/queue-2.6.32/series
+++ b/queue-2.6.32/series
@@ -62,3 +62,26 @@ x86-tsc-fix-a-preemption-leak-in-restore_sched_clock_state.patch
  x86-64-compat-test-rax-for-the-syscall-number-not-eax.patch
  compat-make-compat_alloc_user_space-incorporate-the-access_ok.patch
  x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch
+sched-protect-task-cpus_allowed-access-in-sched_getaffinity.patch
+sched-protect-sched_rr_get_param-access-to-task-sched_class.patch
+sched-consolidate-select_task_rq-callers.patch
+sched-remove-unused-cpu_nr_migrations.patch
+sched-remove-rq-clock-coupling-from-set_task_cpu.patch
+sched-clean-up-ttwu-rq-locking.patch
+sched-sanitize-fork-handling.patch
+sched-remove-forced2_migrations-stats.patch
+sched-make-wakeup-side-and-atomic-variants-of-completion-api-irq-safe.patch
+sched-use-rcu-in-sys_sched_getscheduler-sys_sched_getparam.patch
+sched-use-rcu-in-sched_get-set_affinity.patch
+sched-use-rcu-in-sched_get_rr_param.patch
+sched-fix-set_cpu_active-in-cpu_down.patch
+sched-use-task_waking-for-fork-wakups.patch
+sched-ensure-set_task_cpu-is-never-called-on-blocked-tasks.patch
+sched-make-warning-less-noisy.patch
+sched-fix-broken-assertion.patch
+sched-fix-sched_exec-balancing.patch
+sched-fix-select_task_rq-vs-hotplug-issues.patch
+sched-add-pre-and-post-wakeup-hooks.patch
+sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch
+sched-fix-hotplug-hang.patch
+sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch
author	Greg Kroah-Hartman <gregkh@suse.de>
	Thu, 16 Sep 2010 21:06:53 +0000 (14:06 -0700)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Thu, 16 Sep 2010 21:06:53 +0000 (14:06 -0700)
queue-2.6.32/sched-add-pre-and-post-wakeup-hooks.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-clean-up-ttwu-rq-locking.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-consolidate-select_task_rq-callers.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-ensure-set_task_cpu-is-never-called-on-blocked-tasks.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-broken-assertion.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-hotplug-hang.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-sched_exec-balancing.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-select_task_rq-vs-hotplug-issues.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-set_cpu_active-in-cpu_down.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-make-wakeup-side-and-atomic-variants-of-completion-api-irq-safe.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-make-warning-less-noisy.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-protect-sched_rr_get_param-access-to-task-sched_class.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-protect-task-cpus_allowed-access-in-sched_getaffinity.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-remove-forced2_migrations-stats.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-remove-rq-clock-coupling-from-set_task_cpu.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-remove-unused-cpu_nr_migrations.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-sanitize-fork-handling.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-use-rcu-in-sched_get-set_affinity.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-use-rcu-in-sched_get_rr_param.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-use-rcu-in-sys_sched_getscheduler-sys_sched_getparam.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-use-task_waking-for-fork-wakups.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/series		patch \| blob \| blame \| history