.32 patches

author Greg Kroah-Hartman <gregkh@suse.de>

Sat, 18 Sep 2010 01:21:53 +0000 (18:21 -0700)

committer Greg Kroah-Hartman <gregkh@suse.de>

Sat, 18 Sep 2010 01:21:53 +0000 (18:21 -0700)
author Greg Kroah-Hartman <gregkh@suse.de>
Sat, 18 Sep 2010 01:21:53 +0000 (18:21 -0700)
committer Greg Kroah-Hartman <gregkh@suse.de>
Sat, 18 Sep 2010 01:21:53 +0000 (18:21 -0700)
diff --git a/queue-2.6.32/sched-_cpu_down-don-t-play-with-current-cpus_allowed.patch b/queue-2.6.32/sched-_cpu_down-don-t-play-with-current-cpus_allowed.patch

new file mode 100644 (file)

index 0000000..5563af3
--- /dev/null
+++ b/queue-2.6.32/sched-_cpu_down-don-t-play-with-current-cpus_allowed.patch
@@ -0,0 +1,141 @@
+From oleg@redhat.com  Fri Sep 17 18:17:33 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:23 +0100
+Subject: sched: _cpu_down(): Don't play with current->cpus_allowed
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <6ee57a0da8d81973a62d3c1ce12c5c96e2634b04.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 6a1bdc1b577ebcb65f6603c57f8347309bc4ab13 upstream
+
+_cpu_down() changes the current task's affinity and then recovers it at
+the end. The problems are well known: we can't restore old_allowed if it
+was bound to the now-dead-cpu, and we can race with the userspace which
+can change cpu-affinity during unplug.
+
+_cpu_down() should not play with current->cpus_allowed at all. Instead,
+take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
+removes the dying cpu from cpu_online_mask.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091023.GA9148@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    1 +
+ kernel/cpu.c          |   18 ++++++------------
+ kernel/sched.c        |    2 +-
+ 3 files changed, 8 insertions(+), 13 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1887,6 +1887,7 @@ extern void sched_clock_idle_sleep_event
+ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
++extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
+ extern void idle_task_exit(void);
+ #else
+ static inline void idle_task_exit(void) {}
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -163,6 +163,7 @@ static inline void check_for_tasks(int c
+ }
+ 
+ struct take_cpu_down_param {
++      struct task_struct *caller;
+       unsigned long mod;
+       void *hcpu;
+ };
+@@ -171,6 +172,7 @@ struct take_cpu_down_param {
+ static int __ref take_cpu_down(void *_param)
+ {
+       struct take_cpu_down_param *param = _param;
++      unsigned int cpu = (unsigned long)param->hcpu;
+       int err;
+ 
+       /* Ensure this CPU doesn't handle any more interrupts. */
+@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_pa
+       raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+                               param->hcpu);
+ 
++      if (task_cpu(param->caller) == cpu)
++              move_task_off_dead_cpu(cpu, param->caller);
+       /* Force idle task to run as soon as we yield: it should
+          immediately notice cpu is offline and die quickly. */
+       sched_idle_next();
+@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_pa
+ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ {
+       int err, nr_calls = 0;
+-      cpumask_var_t old_allowed;
+       void *hcpu = (void *)(long)cpu;
+       unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+       struct take_cpu_down_param tcd_param = {
++              .caller = current,
+               .mod = mod,
+               .hcpu = hcpu,
+       };
+@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int
+       if (!cpu_online(cpu))
+               return -EINVAL;
+ 
+-      if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
+-              return -ENOMEM;
+-
+       cpu_hotplug_begin();
+       set_cpu_active(cpu, false);
+       err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
+@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int
+               goto out_release;
+       }
+ 
+-      /* Ensure that we are not runnable on dying cpu */
+-      cpumask_copy(old_allowed, &current->cpus_allowed);
+-      set_cpus_allowed_ptr(current, cpu_active_mask);
+-
+       err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+       if (err) {
+               set_cpu_active(cpu, true);
+@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int
+                                           hcpu) == NOTIFY_BAD)
+                       BUG();
+ 
+-              goto out_allowed;
++              goto out_release;
+       }
+       BUG_ON(cpu_online(cpu));
+ 
+@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int
+ 
+       check_for_tasks(cpu);
+ 
+-out_allowed:
+-      set_cpus_allowed_ptr(current, old_allowed);
+ out_release:
+       cpu_hotplug_done();
+       if (!err) {
+@@ -263,7 +258,6 @@ out_release:
+                                           hcpu) == NOTIFY_BAD)
+                       BUG();
+       }
+-      free_cpumask_var(old_allowed);
+       return err;
+ }
+ 
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -7393,7 +7393,7 @@ static int migration_thread(void *data)
+ /*
+  * Figure out where task on dead CPU should go, use force if necessary.
+  */
+-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
++void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ {
+       struct rq *rq = cpu_rq(dead_cpu);
+       int needs_cpu, uninitialized_var(dest_cpu);
diff --git a/queue-2.6.32/sched-apply-rcu-protection-to-wake_affine.patch b/queue-2.6.32/sched-apply-rcu-protection-to-wake_affine.patch

new file mode 100644 (file)

index 0000000..01e24df
--- /dev/null
+++ b/queue-2.6.32/sched-apply-rcu-protection-to-wake_affine.patch
@@ -0,0 +1,53 @@
+From daniel.blueman@gmail.com  Fri Sep 17 18:19:12 2010
+From: Daniel J Blueman <daniel.blueman@gmail.com>
+Date: Tue, 1 Jun 2010 14:06:13 +0100
+Subject: sched: apply RCU protection to wake_affine()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <a0298aef3a2239c68e4874964036fa4f2dd3a79c.1283514307.git.efault@gmx.de>
+
+From: Daniel J Blueman <daniel.blueman@gmail.com>
+
+commit f3b577dec1f2ce32d2db6d2ca6badff7002512af upstream
+
+The task_group() function returns a pointer that must be protected
+by either RCU, the ->alloc_lock, or the cgroup lock (see the
+rcu_dereference_check() in task_subsys_state(), which is invoked by
+task_group()).  The wake_affine() function currently does none of these,
+which means that a concurrent update would be within its rights to free
+the structure returned by task_group().  Because wake_affine() uses this
+structure only to compute load-balancing heuristics, there is no reason
+to acquire either of the two locks.
+
+Therefore, this commit introduces an RCU read-side critical section that
+starts before the first call to task_group() and ends after the last use
+of the "tg" pointer returned from task_group().  Thanks to Li Zefan for
+pointing out the need to extend the RCU read-side critical section from
+that proposed by the original patch.
+
+Signed-off-by: Daniel J Blueman <daniel.blueman@gmail.com>
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1250,6 +1250,7 @@ static int wake_affine(struct sched_doma
+        * effect of the currently running task from the load
+        * of the current CPU:
+        */
++      rcu_read_lock();
+       if (sync) {
+               tg = task_group(current);
+               weight = current->se.load.weight;
+@@ -1275,6 +1276,7 @@ static int wake_affine(struct sched_doma
+       balanced = !this_load ||
+               100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
+               imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
++      rcu_read_unlock();
+ 
+       /*
+        * If the currently running task will sleep within
diff --git a/queue-2.6.32/sched-cleanup-select_task_rq_fair.patch b/queue-2.6.32/sched-cleanup-select_task_rq_fair.patch

new file mode 100644 (file)

index 0000000..077246a
--- /dev/null
+++ b/queue-2.6.32/sched-cleanup-select_task_rq_fair.patch
@@ -0,0 +1,122 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:19:30 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu, 12 Nov 2009 15:55:28 +0100
+Subject: sched: Cleanup select_task_rq_fair()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <80263dd5bd5a2069a3907f0408ab2f73377f0b8a.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit a50bde5130f65733142b32975616427d0ea50856 upstream
+
+Clean up the new affine to idle sibling bits while trying to
+grok them. Should not have any function differences.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091112145610.832503781@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |   73 ++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 51 insertions(+), 22 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1384,6 +1384,41 @@ find_idlest_cpu(struct sched_group *grou
+ }
+ 
+ /*
++ * Try and locate an idle CPU in the sched_domain.
++ */
++static int
++select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
++{
++      int cpu = smp_processor_id();
++      int prev_cpu = task_cpu(p);
++      int i;
++
++      /*
++       * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE
++       * test in select_task_rq_fair) and the prev_cpu is idle then that's
++       * always a better target than the current cpu.
++       */
++      if (target == cpu) {
++              if (!cpu_rq(prev_cpu)->cfs.nr_running)
++                      target = prev_cpu;
++      }
++
++      /*
++       * Otherwise, iterate the domain and find an elegible idle cpu.
++       */
++      if (target == -1 || target == cpu) {
++              for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
++                      if (!cpu_rq(i)->cfs.nr_running) {
++                              target = i;
++                              break;
++                      }
++              }
++      }
++
++      return target;
++}
++
++/*
+  * sched_balance_self: balance the current task (running on cpu) in domains
+  * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
+  * SD_BALANCE_EXEC.
+@@ -1441,36 +1476,30 @@ select_task_rq_fair(struct rq *rq, struc
+               }
+ 
+               if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
+-                      int candidate = -1, i;
++                      int target = -1;
+ 
++                      /*
++                       * If both cpu and prev_cpu are part of this domain,
++                       * cpu is a valid SD_WAKE_AFFINE target.
++                       */
+                       if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
+-                              candidate = cpu;
++                              target = cpu;
+ 
+                       /*
+-                       * Check for an idle shared cache.
++                       * If there's an idle sibling in this domain, make that
++                       * the wake_affine target instead of the current cpu.
++                       *
++                       * XXX: should we possibly do this outside of
++                       * WAKE_AFFINE, in case the shared cache domain is
++                       * smaller than the WAKE_AFFINE domain?
+                        */
+-                      if (tmp->flags & SD_PREFER_SIBLING) {
+-                              if (candidate == cpu) {
+-                                      if (!cpu_rq(prev_cpu)->cfs.nr_running)
+-                                              candidate = prev_cpu;
+-                              }
+-
+-                              if (candidate == -1 || candidate == cpu) {
+-                                      for_each_cpu(i, sched_domain_span(tmp)) {
+-                                              if (!cpumask_test_cpu(i, &p->cpus_allowed))
+-                                                      continue;
+-                                              if (!cpu_rq(i)->cfs.nr_running) {
+-                                                      candidate = i;
+-                                                      break;
+-                                              }
+-                                      }
+-                              }
+-                      }
++                      if (tmp->flags & SD_PREFER_SIBLING)
++                              target = select_idle_sibling(p, tmp, target);
+ 
+-                      if (candidate >= 0) {
++                      if (target >= 0) {
+                               affine_sd = tmp;
+                               want_affine = 0;
+-                              cpu = candidate;
++                              cpu = target;
+                       }
+               }
+ 
diff --git a/queue-2.6.32/sched-cpuacct-use-bigger-percpu-counter-batch-values-for-stats-counters.patch b/queue-2.6.32/sched-cpuacct-use-bigger-percpu-counter-batch-values-for-stats-counters.patch

new file mode 100644 (file)

index 0000000..097a9d3
--- /dev/null
+++ b/queue-2.6.32/sched-cpuacct-use-bigger-percpu-counter-batch-values-for-stats-counters.patch
@@ -0,0 +1,110 @@
+From anton@samba.org  Fri Sep 17 18:20:49 2010
+From: Anton Blanchard <anton@samba.org>
+Date: Tue, 2 Feb 2010 14:46:13 -0800
+Subject: sched: cpuacct: Use bigger percpu counter batch values for stats counters
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <096b1867bf2f9b6a3fc6c4ed114a02c181d3d77e.1283514307.git.efault@gmx.de>
+
+From: Anton Blanchard <anton@samba.org>
+
+commit fa535a77bd3fa32b9215ba375d6a202fe73e1dd6 upstream
+
+When CONFIG_VIRT_CPU_ACCOUNTING and CONFIG_CGROUP_CPUACCT are
+enabled we can call cpuacct_update_stats with values much larger
+than percpu_counter_batch.  This means the call to
+percpu_counter_add will always add to the global count which is
+protected by a spinlock and we end up with a global spinlock in
+the scheduler.
+
+Based on an idea by KOSAKI Motohiro, this patch scales the batch
+value by cputime_one_jiffy such that we have the same batch
+limit as we would if CONFIG_VIRT_CPU_ACCOUNTING was disabled.
+His patch did this once at boot but that initialisation happened
+too early on PowerPC (before time_init) and it was never updated
+at runtime as a result of a hotplug cpu add/remove.
+
+This patch instead scales percpu_counter_batch by
+cputime_one_jiffy at runtime, which keeps the batch correct even
+after cpu hotplug operations.  We cap it at INT_MAX in case of
+overflow.
+
+For architectures that do not support
+CONFIG_VIRT_CPU_ACCOUNTING, cputime_one_jiffy is the constant 1
+and gcc is smart enough to optimise min(s32
+percpu_counter_batch, INT_MAX) to just percpu_counter_batch at
+least on x86 and PowerPC.  So there is no need to add an #ifdef.
+
+On a 64 thread PowerPC box with CONFIG_VIRT_CPU_ACCOUNTING and
+CONFIG_CGROUP_CPUACCT enabled, a context switch microbenchmark
+is 234x faster and almost matches a CONFIG_CGROUP_CPUACCT
+disabled kernel:
+
+ CONFIG_CGROUP_CPUACCT disabled:   16906698 ctx switches/sec
+ CONFIG_CGROUP_CPUACCT enabled:       61720 ctx switches/sec
+ CONFIG_CGROUP_CPUACCT + patch:           16663217 ctx switches/sec
+
+Tested with:
+
+ wget http://ozlabs.org/~anton/junkcode/context_switch.c
+ make context_switch
+ for i in `seq 0 63`; do taskset -c $i ./context_switch & done
+ vmstat 1
+
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
+Tested-by: Balbir Singh <balbir@linux.vnet.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: "Luck, Tony" <tony.luck@intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -10934,12 +10934,30 @@ static void cpuacct_charge(struct task_s
+ }
+ 
+ /*
++ * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
++ * in cputime_t units. As a result, cpuacct_update_stats calls
++ * percpu_counter_add with values large enough to always overflow the
++ * per cpu batch limit causing bad SMP scalability.
++ *
++ * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
++ * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
++ * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
++ */
++#ifdef CONFIG_SMP
++#define CPUACCT_BATCH \
++      min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
++#else
++#define CPUACCT_BATCH 0
++#endif
++
++/*
+  * Charge the system/user time to the task's accounting group.
+  */
+ static void cpuacct_update_stats(struct task_struct *tsk,
+               enum cpuacct_stat_index idx, cputime_t val)
+ {
+       struct cpuacct *ca;
++      int batch = CPUACCT_BATCH;
+ 
+       if (unlikely(!cpuacct_subsys.active))
+               return;
+@@ -10948,7 +10966,7 @@ static void cpuacct_update_stats(struct
+       ca = task_ca(tsk);
+ 
+       do {
+-              percpu_counter_add(&ca->cpustat[idx], val);
++              __percpu_counter_add(&ca->cpustat[idx], val, batch);
+               ca = ca->parent;
+       } while (ca);
+       rcu_read_unlock();
diff --git a/queue-2.6.32/sched-extend-enqueue_task-to-allow-head-queueing.patch b/queue-2.6.32/sched-extend-enqueue_task-to-allow-head-queueing.patch

new file mode 100644 (file)

index 0000000..b3e8945
--- /dev/null
+++ b/queue-2.6.32/sched-extend-enqueue_task-to-allow-head-queueing.patch
@@ -0,0 +1,122 @@
+From tglx@linutronix.de  Fri Sep 17 18:13:56 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Jan 2010 20:58:57 +0000
+Subject: sched: Extend enqueue_task to allow head queueing
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <e3b3be0a0a3a5c31d5e9f4243f9170302b0de6e5.1283514307.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit ea87bb7853168434f4a82426dd1ea8421f9e604d upstream
+
+The ability of enqueueing a task to the head of a SCHED_FIFO priority
+list is required to fix some violations of POSIX scheduling policy.
+
+Extend the related functions with a "head" argument.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Tested-by: Carsten Emde <cbe@osadl.org>
+Tested-by: Mathias Weber <mathias.weber.mw1@roche.com>
+LKML-Reference: <20100120171629.734886007@linutronix.de>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    3 ++-
+ kernel/sched.c        |   13 +++++++------
+ kernel/sched_fair.c   |    3 ++-
+ kernel/sched_rt.c     |    3 ++-
+ 4 files changed, 13 insertions(+), 9 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1071,7 +1071,8 @@ struct sched_domain;
+ struct sched_class {
+       const struct sched_class *next;
+ 
+-      void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
++      void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
++                            bool head);
+       void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+       void (*yield_task) (struct rq *rq);
+ 
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -1903,13 +1903,14 @@ static void update_avg(u64 *avg, u64 sam
+       *avg += diff >> 3;
+ }
+ 
+-static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+       if (wakeup)
+               p->se.start_runtime = p->se.sum_exec_runtime;
+ 
+       sched_info_queued(p);
+-      p->sched_class->enqueue_task(rq, p, wakeup);
++      p->sched_class->enqueue_task(rq, p, wakeup, head);
+       p->se.on_rq = 1;
+ }
+ 
+@@ -1985,7 +1986,7 @@ static void activate_task(struct rq *rq,
+       if (task_contributes_to_load(p))
+               rq->nr_uninterruptible--;
+ 
+-      enqueue_task(rq, p, wakeup);
++      enqueue_task(rq, p, wakeup, false);
+       inc_nr_running(rq);
+ }
+ 
+@@ -6183,7 +6184,7 @@ void rt_mutex_setprio(struct task_struct
+       if (running)
+               p->sched_class->set_curr_task(rq);
+       if (on_rq) {
+-              enqueue_task(rq, p, 0);
++              enqueue_task(rq, p, 0, false);
+ 
+               check_class_changed(rq, p, prev_class, oldprio, running);
+       }
+@@ -6227,7 +6228,7 @@ void set_user_nice(struct task_struct *p
+       delta = p->prio - old_prio;
+ 
+       if (on_rq) {
+-              enqueue_task(rq, p, 0);
++              enqueue_task(rq, p, 0, false);
+               /*
+                * If the task increased its priority or is running and
+                * lowered its priority, then reschedule its CPU:
+@@ -10180,7 +10181,7 @@ void sched_move_task(struct task_struct
+       if (unlikely(running))
+               tsk->sched_class->set_curr_task(rq);
+       if (on_rq)
+-              enqueue_task(rq, tsk, 0);
++              enqueue_task(rq, tsk, 0, false);
+ 
+       task_rq_unlock(rq, &flags);
+ }
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1031,7 +1031,8 @@ static inline void hrtick_update(struct
+  * increased. Here we update the fair scheduling stats and
+  * then put the task into the rbtree:
+  */
+-static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+       struct cfs_rq *cfs_rq;
+       struct sched_entity *se = &p->se;
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -878,7 +878,8 @@ static void dequeue_rt_entity(struct sch
+ /*
+  * Adding/removing a task to/from a priority array:
+  */
+-static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+       struct sched_rt_entity *rt_se = &p->rt;
+ 
diff --git a/queue-2.6.32/sched-fix-incorrect-sanity-check.patch b/queue-2.6.32/sched-fix-incorrect-sanity-check.patch

new file mode 100644 (file)

index 0000000..201e69c
--- /dev/null
+++ b/queue-2.6.32/sched-fix-incorrect-sanity-check.patch
@@ -0,0 +1,36 @@
+From peterz@infradead.org  Fri Sep 17 18:13:26 2010
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 21 Jan 2010 16:34:27 +0100
+Subject: sched: Fix incorrect sanity check
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <550df2da0c2d00162a463923644fd024de95b890.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 11854247e2c851e7ff9ce138e501c6cffc5a4217 upstream
+
+We moved to migrate on wakeup, which means that sleeping tasks could
+still be present on offline cpus. Amend the check to only test running
+tasks.
+
+Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/cpu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -151,7 +151,7 @@ static inline void check_for_tasks(int c
+ 
+       write_lock_irq(&tasklist_lock);
+       for_each_process(p) {
+-              if (task_cpu(p) == cpu &&
++              if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
+                   (!cputime_eq(p->utime, cputime_zero) ||
+                    !cputime_eq(p->stime, cputime_zero)))
+                       printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
diff --git a/queue-2.6.32/sched-fix-nr_uninterruptible-count.patch b/queue-2.6.32/sched-fix-nr_uninterruptible-count.patch

new file mode 100644 (file)

index 0000000..41d9c1f
--- /dev/null
+++ b/queue-2.6.32/sched-fix-nr_uninterruptible-count.patch
@@ -0,0 +1,46 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:18:32 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 26 Mar 2010 12:22:14 +0100
+Subject: sched: Fix nr_uninterruptible count
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <c1b37a706324879a325f2ec268f2dc1b9958060c.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit cc87f76a601d2d256118f7bab15e35254356ae21 upstream
+
+The cpuload calculation in calc_load_account_active() assumes
+rq->nr_uninterruptible will not change on an offline cpu after
+migrate_nr_uninterruptible(). However the recent migrate on wakeup
+changes broke that and would result in decrementing the offline cpu's
+rq->nr_uninterruptible.
+
+Fix this by accounting the nr_uninterruptible on the waking cpu.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2435,8 +2435,12 @@ static int try_to_wake_up(struct task_st
+        *
+        * First fix up the nr_uninterruptible count:
+        */
+-      if (task_contributes_to_load(p))
+-              rq->nr_uninterruptible--;
++      if (task_contributes_to_load(p)) {
++              if (likely(cpu_online(orig_cpu)))
++                      rq->nr_uninterruptible--;
++              else
++                      this_rq()->nr_uninterruptible--;
++      }
+       p->state = TASK_WAKING;
+ 
+       if (p->sched_class->task_waking)
diff --git a/queue-2.6.32/sched-fix-race-between-ttwu-and-task_rq_lock.patch b/queue-2.6.32/sched-fix-race-between-ttwu-and-task_rq_lock.patch

new file mode 100644 (file)

index 0000000..ddb89fd
--- /dev/null
+++ b/queue-2.6.32/sched-fix-race-between-ttwu-and-task_rq_lock.patch
@@ -0,0 +1,153 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:13:39 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Mon, 15 Feb 2010 14:45:54 +0100
+Subject: sched: Fix race between ttwu() and task_rq_lock()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <80faa6f269f4bd7825aec22056bbca743b5bd100.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 0970d2992dfd7d5ec2c787417cf464f01eeaf42a upstream
+
+Thomas found that due to ttwu() changing a task's cpu without holding
+the rq->lock, task_rq_lock() might end up locking the wrong rq.
+
+Avoid this by serializing against TASK_WAKING.
+
+Reported-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1266241712.15770.420.camel@laptop>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   71 ++++++++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 45 insertions(+), 26 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -942,16 +942,33 @@ static inline void finish_lock_switch(st
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+ 
+ /*
++ * Check whether the task is waking, we use this to synchronize against
++ * ttwu() so that task_cpu() reports a stable number.
++ *
++ * We need to make an exception for PF_STARTING tasks because the fork
++ * path might require task_rq_lock() to work, eg. it can call
++ * set_cpus_allowed_ptr() from the cpuset clone_ns code.
++ */
++static inline int task_is_waking(struct task_struct *p)
++{
++      return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
++}
++
++/*
+  * __task_rq_lock - lock the runqueue a given task resides on.
+  * Must be called interrupts disabled.
+  */
+ static inline struct rq *__task_rq_lock(struct task_struct *p)
+       __acquires(rq->lock)
+ {
++      struct rq *rq;
++
+       for (;;) {
+-              struct rq *rq = task_rq(p);
++              while (task_is_waking(p))
++                      cpu_relax();
++              rq = task_rq(p);
+               spin_lock(&rq->lock);
+-              if (likely(rq == task_rq(p)))
++              if (likely(rq == task_rq(p) && !task_is_waking(p)))
+                       return rq;
+               spin_unlock(&rq->lock);
+       }
+@@ -968,10 +985,12 @@ static struct rq *task_rq_lock(struct ta
+       struct rq *rq;
+ 
+       for (;;) {
++              while (task_is_waking(p))
++                      cpu_relax();
+               local_irq_save(*flags);
+               rq = task_rq(p);
+               spin_lock(&rq->lock);
+-              if (likely(rq == task_rq(p)))
++              if (likely(rq == task_rq(p) && !task_is_waking(p)))
+                       return rq;
+               spin_unlock_irqrestore(&rq->lock, *flags);
+       }
+@@ -2439,14 +2458,27 @@ static int try_to_wake_up(struct task_st
+       __task_rq_unlock(rq);
+ 
+       cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+-      if (cpu != orig_cpu)
++      if (cpu != orig_cpu) {
++              /*
++               * Since we migrate the task without holding any rq->lock,
++               * we need to be careful with task_rq_lock(), since that
++               * might end up locking an invalid rq.
++               */
+               set_task_cpu(p, cpu);
++      }
+ 
+-      rq = __task_rq_lock(p);
++      rq = cpu_rq(cpu);
++      spin_lock(&rq->lock);
+       update_rq_clock(rq);
+ 
++      /*
++       * We migrated the task without holding either rq->lock, however
++       * since the task is not on the task list itself, nobody else
++       * will try and migrate the task, hence the rq should match the
++       * cpu we just moved it to.
++       */
++      WARN_ON(task_cpu(p) != cpu);
+       WARN_ON(p->state != TASK_WAKING);
+-      cpu = task_cpu(p);
+ 
+ #ifdef CONFIG_SCHEDSTATS
+       schedstat_inc(rq, ttwu_count);
+@@ -2695,7 +2727,13 @@ void wake_up_new_task(struct task_struct
+       set_task_cpu(p, cpu);
+ #endif
+ 
+-      rq = task_rq_lock(p, &flags);
++      /*
++       * Since the task is not on the rq and we still have TASK_WAKING set
++       * nobody else will migrate this task.
++       */
++      rq = cpu_rq(cpu);
++      spin_lock_irqsave(&rq->lock, flags);
++
+       BUG_ON(p->state != TASK_WAKING);
+       p->state = TASK_RUNNING;
+       update_rq_clock(rq);
+@@ -7204,27 +7242,8 @@ int set_cpus_allowed_ptr(struct task_str
+       struct rq *rq;
+       int ret = 0;
+ 
+-      /*
+-       * Since we rely on wake-ups to migrate sleeping tasks, don't change
+-       * the ->cpus_allowed mask from under waking tasks, which would be
+-       * possible when we change rq->lock in ttwu(), so synchronize against
+-       * TASK_WAKING to avoid that.
+-       *
+-       * Make an exception for freshly cloned tasks, since cpuset namespaces
+-       * might move the task about, we have to validate the target in
+-       * wake_up_new_task() anyway since the cpu might have gone away.
+-       */
+-again:
+-      while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
+-              cpu_relax();
+-
+       rq = task_rq_lock(p, &flags);
+ 
+-      if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
+-              task_rq_unlock(rq, &flags);
+-              goto again;
+-      }
+-
+       if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+               ret = -EINVAL;
+               goto out;
diff --git a/queue-2.6.32/sched-fix-rq-clock-synchronization-when-migrating-tasks.patch b/queue-2.6.32/sched-fix-rq-clock-synchronization-when-migrating-tasks.patch

new file mode 100644 (file)

index 0000000..e21b05d
--- /dev/null
+++ b/queue-2.6.32/sched-fix-rq-clock-synchronization-when-migrating-tasks.patch
@@ -0,0 +1,44 @@
+From peterz@infradead.org  Fri Sep 17 18:18:47 2010
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 19 Aug 2010 13:31:43 +0200
+Subject: sched: Fix rq->clock synchronization when migrating tasks
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <748cfa7664c3c3092de1cf8c86f96474f840bed6.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 861d034ee814917a83bd5de4b26e3b8336ddeeb8 upstream
+
+sched_fork() -- we do task placement in ->task_fork_fair() ensure we
+  update_rq_clock() so we work with current time. We leave the vruntime
+  in relative state, so the time delay until wake_up_new_task() doesn't
+  matter.
+
+wake_up_new_task() -- Since task_fork_fair() left p->vruntime in
+  relative state we can safely migrate, the activate_task() on the
+  remote rq will call update_rq_clock() and causes the clock to be
+  synced (enough).
+
+Tested-by: Jack Daniel <wanders.thirst@gmail.com>
+Tested-by: Philby John <pjohn@mvista.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1281002322.1923.1708.camel@laptop>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1963,6 +1963,8 @@ static void task_fork_fair(struct task_s
+ 
+       spin_lock_irqsave(&rq->lock, flags);
+ 
++      update_rq_clock(rq);
++
+       if (unlikely(task_cpu(p) != this_cpu))
+               __set_task_cpu(p, this_cpu);
+ 
diff --git a/queue-2.6.32/sched-fix-select_idle_sibling-logic-in-select_task_rq_fair.patch b/queue-2.6.32/sched-fix-select_idle_sibling-logic-in-select_task_rq_fair.patch

new file mode 100644 (file)

index 0000000..08651a2
--- /dev/null
+++ b/queue-2.6.32/sched-fix-select_idle_sibling-logic-in-select_task_rq_fair.patch
@@ -0,0 +1,174 @@
+From suresh.b.siddha@intel.com  Fri Sep 17 18:20:36 2010
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+Date: Wed, 31 Mar 2010 16:47:45 -0700
+Subject: sched: Fix select_idle_sibling() logic in select_task_rq_fair()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <7c9917f68be3e57e65b938ff15cc6a2b1cc0da16.1283514307.git.efault@gmx.de>
+
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+
+commit 99bd5e2f245d8cd17d040c82d40becdb3efd9b69 upstream
+
+Issues in the current select_idle_sibling() logic in select_task_rq_fair()
+in the context of a task wake-up:
+
+a) Once we select the idle sibling, we use that domain (spanning the cpu that
+   the task is currently woken-up and the idle sibling that we found) in our
+   wake_affine() decisions. This domain is completely different from the
+   domain(we are supposed to use) that spans the cpu that the task currently
+   woken-up and the cpu where the task previously ran.
+
+b) We do select_idle_sibling() check only for the cpu that the task is
+   currently woken-up on. If select_task_rq_fair() selects the previously run
+   cpu for waking the task, doing a select_idle_sibling() check
+   for that cpu also helps and we don't do this currently.
+
+c) In the scenarios where the cpu that the task is woken-up is busy but
+   with its HT siblings are idle, we are selecting the task be woken-up
+   on the idle HT sibling instead of a core that it previously ran
+   and currently completely idle. i.e., we are not taking decisions based on
+   wake_affine() but directly selecting an idle sibling that can cause
+   an imbalance at the SMT/MC level which will be later corrected by the
+   periodic load balancer.
+
+Fix this by first going through the load imbalance calculations using
+wake_affine() and once we make a decision of woken-up cpu vs previously-ran cpu,
+then choose a possible idle sibling for waking up the task on.
+
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1270079265.7835.8.camel@sbs-t61.sc.intel.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |   82 +++++++++++++++++++++++++---------------------------
+ 1 file changed, 40 insertions(+), 42 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1386,29 +1386,48 @@ find_idlest_cpu(struct sched_group *grou
+ /*
+  * Try and locate an idle CPU in the sched_domain.
+  */
+-static int
+-select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
++static int select_idle_sibling(struct task_struct *p, int target)
+ {
+       int cpu = smp_processor_id();
+       int prev_cpu = task_cpu(p);
++      struct sched_domain *sd;
+       int i;
+ 
+       /*
+-       * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE
+-       * test in select_task_rq_fair) and the prev_cpu is idle then that's
+-       * always a better target than the current cpu.
++       * If the task is going to be woken-up on this cpu and if it is
++       * already idle, then it is the right target.
++       */
++      if (target == cpu && idle_cpu(cpu))
++              return cpu;
++
++      /*
++       * If the task is going to be woken-up on the cpu where it previously
++       * ran and if it is currently idle, then it the right target.
+        */
+-      if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running)
++      if (target == prev_cpu && idle_cpu(prev_cpu))
+               return prev_cpu;
+ 
+       /*
+-       * Otherwise, iterate the domain and find an elegible idle cpu.
++       * Otherwise, iterate the domains and find an elegible idle cpu.
+        */
+-      for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
+-              if (!cpu_rq(i)->cfs.nr_running) {
+-                      target = i;
++      for_each_domain(target, sd) {
++              if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
+                       break;
++
++              for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
++                      if (idle_cpu(i)) {
++                              target = i;
++                              break;
++                      }
+               }
++
++              /*
++               * Lets stop looking for an idle sibling when we reached
++               * the domain that spans the current cpu and prev_cpu.
++               */
++              if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
++                  cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
++                      break;
+       }
+ 
+       return target;
+@@ -1432,7 +1451,7 @@ select_task_rq_fair(struct rq *rq, struc
+       int cpu = smp_processor_id();
+       int prev_cpu = task_cpu(p);
+       int new_cpu = cpu;
+-      int want_affine = 0, cpu_idle = !current->pid;
++      int want_affine = 0;
+       int want_sd = 1;
+       int sync = wake_flags & WF_SYNC;
+ 
+@@ -1472,36 +1491,13 @@ select_task_rq_fair(struct rq *rq, struc
+               }
+ 
+               /*
+-               * While iterating the domains looking for a spanning
+-               * WAKE_AFFINE domain, adjust the affine target to any idle cpu
+-               * in cache sharing domains along the way.
++               * If both cpu and prev_cpu are part of this domain,
++               * cpu is a valid SD_WAKE_AFFINE target.
+                */
+-              if (want_affine) {
+-                      int target = -1;
+-
+-                      /*
+-                       * If both cpu and prev_cpu are part of this domain,
+-                       * cpu is a valid SD_WAKE_AFFINE target.
+-                       */
+-                      if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
+-                              target = cpu;
+-
+-                      /*
+-                       * If there's an idle sibling in this domain, make that
+-                       * the wake_affine target instead of the current cpu.
+-                       */
+-                      if (!cpu_idle && tmp->flags & SD_SHARE_PKG_RESOURCES)
+-                              target = select_idle_sibling(p, tmp, target);
+-
+-                      if (target >= 0) {
+-                              if (tmp->flags & SD_WAKE_AFFINE) {
+-                                      affine_sd = tmp;
+-                                      want_affine = 0;
+-                                      if (target != cpu)
+-                                              cpu_idle = 1;
+-                              }
+-                              cpu = target;
+-                      }
++              if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
++                  cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
++                      affine_sd = tmp;
++                      want_affine = 0;
+               }
+ 
+               if (!want_sd && !want_affine)
+@@ -1532,8 +1528,10 @@ select_task_rq_fair(struct rq *rq, struc
+ #endif
+ 
+       if (affine_sd) {
+-              if (cpu_idle || cpu == prev_cpu || wake_affine(affine_sd, p, sync))
+-                      return cpu;
++              if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
++                      return select_idle_sibling(p, cpu);
++              else
++                      return select_idle_sibling(p, prev_cpu);
+       }
+ 
+       while (sd) {
diff --git a/queue-2.6.32/sched-fix-select_idle_sibling.patch b/queue-2.6.32/sched-fix-select_idle_sibling.patch

new file mode 100644 (file)

index 0000000..79629d1
--- /dev/null
+++ b/queue-2.6.32/sched-fix-select_idle_sibling.patch
@@ -0,0 +1,76 @@
+From efault@gmx.de  Fri Sep 17 18:20:11 2010
+From: Mike Galbraith <efault@gmx.de>
+Date: Thu, 11 Mar 2010 17:17:16 +0100
+Subject: sched: Fix select_idle_sibling()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <2dc48f18ab671dc1c87c87dba674ff4b755d17ff.1283514307.git.efault@gmx.de>
+
+From: Mike Galbraith <efault@gmx.de>
+
+commit 8b911acdf08477c059d1c36c21113ab1696c612b upstream
+
+Don't bother with selection when the current cpu is idle.  Recent load
+balancing changes also make it no longer necessary to check wake_affine()
+success before returning the selected sibling, so we now always use it.
+
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1268301369.6785.36.camel@marge.simson.net>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1432,7 +1432,7 @@ select_task_rq_fair(struct rq *rq, struc
+       int cpu = smp_processor_id();
+       int prev_cpu = task_cpu(p);
+       int new_cpu = cpu;
+-      int want_affine = 0;
++      int want_affine = 0, cpu_idle = !current->pid;
+       int want_sd = 1;
+       int sync = wake_flags & WF_SYNC;
+ 
+@@ -1490,13 +1490,15 @@ select_task_rq_fair(struct rq *rq, struc
+                        * If there's an idle sibling in this domain, make that
+                        * the wake_affine target instead of the current cpu.
+                        */
+-                      if (tmp->flags & SD_SHARE_PKG_RESOURCES)
++                      if (!cpu_idle && tmp->flags & SD_SHARE_PKG_RESOURCES)
+                               target = select_idle_sibling(p, tmp, target);
+ 
+                       if (target >= 0) {
+                               if (tmp->flags & SD_WAKE_AFFINE) {
+                                       affine_sd = tmp;
+                                       want_affine = 0;
++                                      if (target != cpu)
++                                              cpu_idle = 1;
+                               }
+                               cpu = target;
+                       }
+@@ -1512,6 +1514,7 @@ select_task_rq_fair(struct rq *rq, struc
+                       sd = tmp;
+       }
+ 
++#ifdef CONFIG_FAIR_GROUP_SCHED
+       if (sched_feat(LB_SHARES_UPDATE)) {
+               /*
+                * Pick the largest domain to update shares over
+@@ -1528,9 +1531,12 @@ select_task_rq_fair(struct rq *rq, struc
+                       spin_lock(&rq->lock);
+               }
+       }
++#endif
+ 
+-      if (affine_sd && wake_affine(affine_sd, p, sync))
+-              return cpu;
++      if (affine_sd) {
++              if (cpu_idle || cpu == prev_cpu || wake_affine(affine_sd, p, sync))
++                      return cpu;
++      }
+ 
+       while (sd) {
+               int load_idx = sd->forkexec_idx;
diff --git a/queue-2.6.32/sched-fix-task_waking-vs-fork-deadlock.patch b/queue-2.6.32/sched-fix-task_waking-vs-fork-deadlock.patch

new file mode 100644 (file)

index 0000000..fdd34c0
--- /dev/null
+++ b/queue-2.6.32/sched-fix-task_waking-vs-fork-deadlock.patch
@@ -0,0 +1,246 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:18:02 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Wed, 24 Mar 2010 18:34:10 +0100
+Subject: sched: Fix TASK_WAKING vs fork deadlock
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <1620f28b03b31be9190132c280a85fc1d08141a8.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 0017d735092844118bef006696a750a0e4ef6ebd upstream
+
+Oleg noticed a few races with the TASK_WAKING usage on fork.
+
+ - since TASK_WAKING is basically a spinlock, it should be IRQ safe
+ - since we set TASK_WAKING (*) without holding rq->lock it could
+   be there still is a rq->lock holder, thereby not actually
+   providing full serialization.
+
+(*) in fact we clear PF_STARTING, which in effect enables TASK_WAKING.
+
+Cure the second issue by not setting TASK_WAKING in sched_fork(), but
+only temporarily in wake_up_new_task() while calling select_task_rq().
+
+Cure the first by holding rq->lock around the select_task_rq() call,
+this will disable IRQs, this however requires that we push down the
+rq->lock release into select_task_rq_fair()'s cgroup stuff.
+
+Because select_task_rq_fair() still needs to drop the rq->lock we
+cannot fully get rid of TASK_WAKING.
+
+Reported-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h   |    3 +-
+ kernel/sched.c          |   65 +++++++++++++++++-------------------------------
+ kernel/sched_fair.c     |    8 ++++-
+ kernel/sched_idletask.c |    3 +-
+ kernel/sched_rt.c       |    5 +--
+ 5 files changed, 36 insertions(+), 48 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1082,7 +1082,8 @@ struct sched_class {
+       void (*put_prev_task) (struct rq *rq, struct task_struct *p);
+ 
+ #ifdef CONFIG_SMP
+-      int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
++      int  (*select_task_rq)(struct rq *rq, struct task_struct *p,
++                             int sd_flag, int flags);
+ 
+       unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
+                       struct rq *busiest, unsigned long max_load_move,
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -944,14 +944,10 @@ static inline void finish_lock_switch(st
+ /*
+  * Check whether the task is waking, we use this to synchronize against
+  * ttwu() so that task_cpu() reports a stable number.
+- *
+- * We need to make an exception for PF_STARTING tasks because the fork
+- * path might require task_rq_lock() to work, eg. it can call
+- * set_cpus_allowed_ptr() from the cpuset clone_ns code.
+  */
+ static inline int task_is_waking(struct task_struct *p)
+ {
+-      return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
++      return unlikely(p->state == TASK_WAKING);
+ }
+ 
+ /*
+@@ -2373,9 +2369,9 @@ static int select_fallback_rq(int cpu, s
+  * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
+  */
+ static inline
+-int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
++int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
+ {
+-      int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
++      int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
+ 
+       /*
+        * In order not to call set_task_cpu() on a blocking task we need
+@@ -2450,17 +2446,10 @@ static int try_to_wake_up(struct task_st
+       if (p->sched_class->task_waking)
+               p->sched_class->task_waking(rq, p);
+ 
+-      __task_rq_unlock(rq);
+-
+-      cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+-      if (cpu != orig_cpu) {
+-              /*
+-               * Since we migrate the task without holding any rq->lock,
+-               * we need to be careful with task_rq_lock(), since that
+-               * might end up locking an invalid rq.
+-               */
++      cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
++      if (cpu != orig_cpu)
+               set_task_cpu(p, cpu);
+-      }
++      __task_rq_unlock(rq);
+ 
+       rq = cpu_rq(cpu);
+       spin_lock(&rq->lock);
+@@ -2638,11 +2627,11 @@ void sched_fork(struct task_struct *p, i
+ 
+       __sched_fork(p);
+       /*
+-       * We mark the process as waking here. This guarantees that
++       * We mark the process as running here. This guarantees that
+        * nobody will actually run it, and a signal or other external
+        * event cannot wake it up and insert it on the runqueue either.
+        */
+-      p->state = TASK_WAKING;
++      p->state = TASK_RUNNING;
+ 
+       /*
+        * Revert to default priority/policy on fork if requested.
+@@ -2709,28 +2698,25 @@ void wake_up_new_task(struct task_struct
+       int cpu = get_cpu();
+ 
+ #ifdef CONFIG_SMP
++      rq = task_rq_lock(p, &flags);
++      p->state = TASK_WAKING;
++
+       /*
+        * Fork balancing, do it here and not earlier because:
+        *  - cpus_allowed can change in the fork path
+        *  - any previously selected cpu might disappear through hotplug
+        *
+-       * We still have TASK_WAKING but PF_STARTING is gone now, meaning
+-       * ->cpus_allowed is stable, we have preemption disabled, meaning
+-       * cpu_online_mask is stable.
++       * We set TASK_WAKING so that select_task_rq() can drop rq->lock
++       * without people poking at ->cpus_allowed.
+        */
+-      cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
++      cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
+       set_task_cpu(p, cpu);
+-#endif
+-
+-      /*
+-       * Since the task is not on the rq and we still have TASK_WAKING set
+-       * nobody else will migrate this task.
+-       */
+-      rq = cpu_rq(cpu);
+-      spin_lock_irqsave(&rq->lock, flags);
+ 
+-      BUG_ON(p->state != TASK_WAKING);
+       p->state = TASK_RUNNING;
++      task_rq_unlock(rq, &flags);
++#endif
++
++      rq = task_rq_lock(p, &flags);
+       update_rq_clock(rq);
+       activate_task(rq, p, 0);
+       trace_sched_wakeup_new(rq, p, 1);
+@@ -3215,19 +3201,15 @@ void sched_exec(void)
+ {
+       struct task_struct *p = current;
+       struct migration_req req;
+-      int dest_cpu, this_cpu;
+       unsigned long flags;
+       struct rq *rq;
+-
+-      this_cpu = get_cpu();
+-      dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
+-      if (dest_cpu == this_cpu) {
+-              put_cpu();
+-              return;
+-      }
++      int dest_cpu;
+ 
+       rq = task_rq_lock(p, &flags);
+-      put_cpu();
++      dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
++      if (dest_cpu == smp_processor_id())
++              goto unlock;
++
+       /*
+        * select_task_rq() can race against ->cpus_allowed
+        */
+@@ -3245,6 +3227,7 @@ void sched_exec(void)
+ 
+               return;
+       }
++unlock:
+       task_rq_unlock(rq, &flags);
+ }
+ 
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1392,7 +1392,8 @@ find_idlest_cpu(struct sched_group *grou
+  *
+  * preempt must be disabled.
+  */
+-static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
++static int
++select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
+ {
+       struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+       int cpu = smp_processor_id();
+@@ -1492,8 +1493,11 @@ static int select_task_rq_fair(struct ta
+                                 cpumask_weight(sched_domain_span(sd))))
+                       tmp = affine_sd;
+ 
+-              if (tmp)
++              if (tmp) {
++                      spin_unlock(&rq->lock);
+                       update_shares(tmp);
++                      spin_lock(&rq->lock);
++              }
+       }
+ 
+       if (affine_sd && wake_affine(affine_sd, p, sync)) {
+--- a/kernel/sched_idletask.c
++++ b/kernel/sched_idletask.c
+@@ -6,7 +6,8 @@
+  */
+ 
+ #ifdef CONFIG_SMP
+-static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
++static int
++select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+ {
+       return task_cpu(p); /* IDLE tasks as never migrated */
+ }
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -942,10 +942,9 @@ static void yield_task_rt(struct rq *rq)
+ #ifdef CONFIG_SMP
+ static int find_lowest_rq(struct task_struct *task);
+ 
+-static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
++static int
++select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+ {
+-      struct rq *rq = task_rq(p);
+-
+       if (sd_flag != SD_BALANCE_WAKE)
+               return smp_processor_id();
+ 
diff --git a/queue-2.6.32/sched-fix-vmark-regression-on-big-machines.patch b/queue-2.6.32/sched-fix-vmark-regression-on-big-machines.patch

new file mode 100644 (file)

index 0000000..6d0899d
--- /dev/null
+++ b/queue-2.6.32/sched-fix-vmark-regression-on-big-machines.patch
@@ -0,0 +1,51 @@
+From efault@gmx.de  Fri Sep 17 18:19:56 2010
+From: Mike Galbraith <efault@gmx.de>
+Date: Mon, 4 Jan 2010 14:44:56 +0100
+Subject: sched: Fix vmark regression on big machines
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <c0a4bd155e864c31aa575d64ae6330d563ed03fb.1283514307.git.efault@gmx.de>
+
+From: Mike Galbraith <efault@gmx.de>
+
+commit 50b926e439620c469565e8be0f28be78f5fca1ce upstream
+
+SD_PREFER_SIBLING is set at the CPU domain level if power saving isn't
+enabled, leading to many cache misses on large machines as we traverse
+looking for an idle shared cache to wake to.  Change the enabler of
+select_idle_sibling() to SD_SHARE_PKG_RESOURCES, and enable same at the
+sibling domain level.
+
+Reported-by: Lin Ming <ming.m.lin@intel.com>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1262612696.15495.15.camel@marge.simson.net>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/topology.h |    2 +-
+ kernel/sched_fair.c      |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
+                               | 1*SD_WAKE_AFFINE                      \
+                               | 1*SD_SHARE_CPUPOWER                   \
+                               | 0*SD_POWERSAVINGS_BALANCE             \
+-                              | 0*SD_SHARE_PKG_RESOURCES              \
++                              | 1*SD_SHARE_PKG_RESOURCES              \
+                               | 0*SD_SERIALIZE                        \
+                               | 0*SD_PREFER_SIBLING                   \
+                               ,                                       \
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1490,7 +1490,7 @@ select_task_rq_fair(struct rq *rq, struc
+                        * If there's an idle sibling in this domain, make that
+                        * the wake_affine target instead of the current cpu.
+                        */
+-                      if (tmp->flags & SD_PREFER_SIBLING)
++                      if (tmp->flags & SD_SHARE_PKG_RESOURCES)
+                               target = select_idle_sibling(p, tmp, target);
+ 
+                       if (target >= 0) {
diff --git a/queue-2.6.32/sched-implement-head-queueing-for-sched_rt.patch b/queue-2.6.32/sched-implement-head-queueing-for-sched_rt.patch

new file mode 100644 (file)

index 0000000..04f0a09
--- /dev/null
+++ b/queue-2.6.32/sched-implement-head-queueing-for-sched_rt.patch
@@ -0,0 +1,101 @@
+From tglx@linutronix.de  Fri Sep 17 18:14:11 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Jan 2010 20:59:01 +0000
+Subject: sched: Implement head queueing for sched_rt
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <06654220e9d17d06d30535777dfbcdf5ab2d7e57.1283514307.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 37dad3fce97f01e5149d69de0833d8452c0e862e upstream
+
+The ability of enqueueing a task to the head of a SCHED_FIFO priority
+list is required to fix some violations of POSIX scheduling policy.
+
+Implement the functionality in sched_rt.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Tested-by: Carsten Emde <cbe@osadl.org>
+Tested-by: Mathias Weber <mathias.weber.mw1@roche.com>
+LKML-Reference: <20100120171629.772169931@linutronix.de>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_rt.c |   19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -194,7 +194,7 @@ static inline struct rt_rq *group_rt_rq(
+       return rt_se->my_q;
+ }
+ 
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+ 
+ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+@@ -204,7 +204,7 @@ static void sched_rt_rq_enqueue(struct r
+ 
+       if (rt_rq->rt_nr_running) {
+               if (rt_se && !on_rt_rq(rt_se))
+-                      enqueue_rt_entity(rt_se);
++                      enqueue_rt_entity(rt_se, false);
+               if (rt_rq->highest_prio.curr < curr->prio)
+                       resched_task(curr);
+       }
+@@ -803,7 +803,7 @@ void dec_rt_tasks(struct sched_rt_entity
+       dec_rt_group(rt_se, rt_rq);
+ }
+ 
+-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
++static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ {
+       struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+       struct rt_prio_array *array = &rt_rq->active;
+@@ -819,7 +819,10 @@ static void __enqueue_rt_entity(struct s
+       if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
+               return;
+ 
+-      list_add_tail(&rt_se->run_list, queue);
++      if (head)
++              list_add(&rt_se->run_list, queue);
++      else
++              list_add_tail(&rt_se->run_list, queue);
+       __set_bit(rt_se_prio(rt_se), array->bitmap);
+ 
+       inc_rt_tasks(rt_se, rt_rq);
+@@ -856,11 +859,11 @@ static void dequeue_rt_stack(struct sche
+       }
+ }
+ 
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ {
+       dequeue_rt_stack(rt_se);
+       for_each_sched_rt_entity(rt_se)
+-              __enqueue_rt_entity(rt_se);
++              __enqueue_rt_entity(rt_se, head);
+ }
+ 
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+@@ -871,7 +874,7 @@ static void dequeue_rt_entity(struct sch
+               struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ 
+               if (rt_rq && rt_rq->rt_nr_running)
+-                      __enqueue_rt_entity(rt_se);
++                      __enqueue_rt_entity(rt_se, false);
+       }
+ }
+ 
+@@ -886,7 +889,7 @@ enqueue_task_rt(struct rq *rq, struct ta
+       if (wakeup)
+               rt_se->timeout = 0;
+ 
+-      enqueue_rt_entity(rt_se);
++      enqueue_rt_entity(rt_se, head);
+ 
+       if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
+               enqueue_pushable_task(rq, p);
diff --git a/queue-2.6.32/sched-kill-the-broken-and-deadlockable-cpuset_lock-cpuset_cpus_allowed_locked-code.patch b/queue-2.6.32/sched-kill-the-broken-and-deadlockable-cpuset_lock-cpuset_cpus_allowed_locked-code.patch

new file mode 100644 (file)

index 0000000..8c40107
--- /dev/null
+++ b/queue-2.6.32/sched-kill-the-broken-and-deadlockable-cpuset_lock-cpuset_cpus_allowed_locked-code.patch
@@ -0,0 +1,171 @@
+From oleg@redhat.com  Fri Sep 17 18:14:53 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:03 +0100
+Subject: sched: Kill the broken and deadlockable cpuset_lock/cpuset_cpus_allowed_locked code
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <2ed3dbb00c3052ccb7ffda1e7a1d112e3d3f53f1.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 897f0b3c3ff40b443c84e271bef19bd6ae885195 upstream
+
+This patch just states the fact the cpusets/cpuhotplug interaction is
+broken and removes the deadlockable code which only pretends to work.
+
+- cpuset_lock() doesn't really work. It is needed for
+  cpuset_cpus_allowed_locked() but we can't take this lock in
+  try_to_wake_up()->select_fallback_rq() path.
+
+- cpuset_lock() is deadlockable. Suppose that a task T bound to CPU takes
+  callback_mutex. If cpu_down(CPU) happens before T drops callback_mutex
+  stop_machine() preempts T, then migration_call(CPU_DEAD) tries to take
+  cpuset_lock() and hangs forever because CPU is already dead and thus
+  T can't be scheduled.
+
+- cpuset_cpus_allowed_locked() is deadlockable too. It takes task_lock()
+  which is not irq-safe, but try_to_wake_up() can be called from irq.
+
+Kill them, and change select_fallback_rq() to use cpu_possible_mask, like
+we currently do without CONFIG_CPUSETS.
+
+Also, with or without this patch, with or without CONFIG_CPUSETS, the
+callers of select_fallback_rq() can race with each other or with
+set_cpus_allowed() pathes.
+
+The subsequent patches try to to fix these problems.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091003.GA9123@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/cpuset.h |   13 -------------
+ kernel/cpuset.c        |   27 +--------------------------
+ kernel/sched.c         |   10 +++-------
+ 3 files changed, 4 insertions(+), 46 deletions(-)
+
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -21,8 +21,6 @@ extern int number_of_cpusets;        /* How man
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+ extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+-extern void cpuset_cpus_allowed_locked(struct task_struct *p,
+-                                     struct cpumask *mask);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+ void cpuset_init_current_mems_allowed(void);
+@@ -69,9 +67,6 @@ struct seq_file;
+ extern void cpuset_task_status_allowed(struct seq_file *m,
+                                       struct task_struct *task);
+ 
+-extern void cpuset_lock(void);
+-extern void cpuset_unlock(void);
+-
+ extern int cpuset_mem_spread_node(void);
+ 
+ static inline int cpuset_do_page_mem_spread(void)
+@@ -105,11 +100,6 @@ static inline void cpuset_cpus_allowed(s
+ {
+       cpumask_copy(mask, cpu_possible_mask);
+ }
+-static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
+-                                            struct cpumask *mask)
+-{
+-      cpumask_copy(mask, cpu_possible_mask);
+-}
+ 
+ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
+ {
+@@ -157,9 +147,6 @@ static inline void cpuset_task_status_al
+ {
+ }
+ 
+-static inline void cpuset_lock(void) {}
+-static inline void cpuset_unlock(void) {}
+-
+ static inline int cpuset_mem_spread_node(void)
+ {
+       return 0;
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -2145,19 +2145,10 @@ void __init cpuset_init_smp(void)
+ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
+ {
+       mutex_lock(&callback_mutex);
+-      cpuset_cpus_allowed_locked(tsk, pmask);
+-      mutex_unlock(&callback_mutex);
+-}
+-
+-/**
+- * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
+- * Must be called with callback_mutex held.
+- **/
+-void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
+-{
+       task_lock(tsk);
+       guarantee_online_cpus(task_cs(tsk), pmask);
+       task_unlock(tsk);
++      mutex_unlock(&callback_mutex);
+ }
+ 
+ void cpuset_init_current_mems_allowed(void)
+@@ -2346,22 +2337,6 @@ int __cpuset_node_allowed_hardwall(int n
+ }
+ 
+ /**
+- * cpuset_lock - lock out any changes to cpuset structures
+- *
+- * The out of memory (oom) code needs to mutex_lock cpusets
+- * from being changed while it scans the tasklist looking for a
+- * task in an overlapping cpuset.  Expose callback_mutex via this
+- * cpuset_lock() routine, so the oom code can lock it, before
+- * locking the task list.  The tasklist_lock is a spinlock, so
+- * must be taken inside callback_mutex.
+- */
+-
+-void cpuset_lock(void)
+-{
+-      mutex_lock(&callback_mutex);
+-}
+-
+-/**
+  * cpuset_unlock - release lock on cpuset changes
+  *
+  * Undo the lock taken in a previous cpuset_lock() call.
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2349,11 +2349,9 @@ static int select_fallback_rq(int cpu, s
+               return dest_cpu;
+ 
+       /* No more Mr. Nice Guy. */
+-      if (dest_cpu >= nr_cpu_ids) {
+-              rcu_read_lock();
+-              cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+-              rcu_read_unlock();
+-              dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
++      if (unlikely(dest_cpu >= nr_cpu_ids)) {
++              cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
++              dest_cpu = cpumask_any(cpu_active_mask);
+ 
+               /*
+                * Don't tell them about moving exiting tasks or
+@@ -7833,7 +7831,6 @@ migration_call(struct notifier_block *nf
+ 
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+-              cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
+               migrate_live_tasks(cpu);
+               rq = cpu_rq(cpu);
+               /* Idle task back to normal (off runqueue, low prio) */
+@@ -7844,7 +7841,6 @@ migration_call(struct notifier_block *nf
+               rq->idle->sched_class = &idle_sched_class;
+               migrate_dead_tasks(cpu);
+               spin_unlock_irq(&rq->lock);
+-              cpuset_unlock();
+               migrate_nr_uninterruptible(rq);
+               BUG_ON(rq->nr_running != 0);
+               calc_global_load_remove(rq);
diff --git a/queue-2.6.32/sched-make-select_fallback_rq-cpuset-friendly.patch b/queue-2.6.32/sched-make-select_fallback_rq-cpuset-friendly.patch

new file mode 100644 (file)

index 0000000..833c7cc
--- /dev/null
+++ b/queue-2.6.32/sched-make-select_fallback_rq-cpuset-friendly.patch
@@ -0,0 +1,123 @@
+From oleg@redhat.com  Fri Sep 17 18:17:45 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:27 +0100
+Subject: sched: Make select_fallback_rq() cpuset friendly
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <cfcf4b5d923ac7e65cf0725c08e5ab233634719a.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 9084bb8246ea935b98320554229e2f371f7f52fa upstream
+
+Introduce cpuset_cpus_allowed_fallback() helper to fix the cpuset problems
+with select_fallback_rq(). It can be called from any context and can't use
+any cpuset locks including task_lock(). It is called when the task doesn't
+have online cpus in ->cpus_allowed but ttwu/etc must be able to find a
+suitable cpu.
+
+I am not proud of this patch. Everything which needs such a fat comment
+can't be good even if correct. But I'd prefer to not change the locking
+rules in the code I hardly understand, and in any case I believe this
+simple change make the code much more correct compared to deadlocks we
+currently have.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091027.GA9155@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/cpuset.h |    7 +++++++
+ kernel/cpuset.c        |   42 ++++++++++++++++++++++++++++++++++++++++++
+ kernel/sched.c         |    4 +---
+ 3 files changed, 50 insertions(+), 3 deletions(-)
+
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -21,6 +21,7 @@ extern int number_of_cpusets;        /* How man
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+ extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
++extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+ void cpuset_init_current_mems_allowed(void);
+@@ -101,6 +102,12 @@ static inline void cpuset_cpus_allowed(s
+       cpumask_copy(mask, cpu_possible_mask);
+ }
+ 
++static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
++{
++      cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
++      return cpumask_any(cpu_active_mask);
++}
++
+ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
+ {
+       return node_possible_map;
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -2151,6 +2151,48 @@ void cpuset_cpus_allowed(struct task_str
+       mutex_unlock(&callback_mutex);
+ }
+ 
++int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
++{
++      const struct cpuset *cs;
++      int cpu;
++
++      rcu_read_lock();
++      cs = task_cs(tsk);
++      if (cs)
++              cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
++      rcu_read_unlock();
++
++      /*
++       * We own tsk->cpus_allowed, nobody can change it under us.
++       *
++       * But we used cs && cs->cpus_allowed lockless and thus can
++       * race with cgroup_attach_task() or update_cpumask() and get
++       * the wrong tsk->cpus_allowed. However, both cases imply the
++       * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
++       * which takes task_rq_lock().
++       *
++       * If we are called after it dropped the lock we must see all
++       * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
++       * set any mask even if it is not right from task_cs() pov,
++       * the pending set_cpus_allowed_ptr() will fix things.
++       */
++
++      cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
++      if (cpu >= nr_cpu_ids) {
++              /*
++               * Either tsk->cpus_allowed is wrong (see above) or it
++               * is actually empty. The latter case is only possible
++               * if we are racing with remove_tasks_in_empty_cpuset().
++               * Like above we can temporary set any mask and rely on
++               * set_cpus_allowed_ptr() as synchronization point.
++               */
++              cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
++              cpu = cpumask_any(cpu_active_mask);
++      }
++
++      return cpu;
++}
++
+ void cpuset_init_current_mems_allowed(void)
+ {
+       nodes_setall(current->mems_allowed);
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2353,9 +2353,7 @@ static int select_fallback_rq(int cpu, s
+ 
+       /* No more Mr. Nice Guy. */
+       if (unlikely(dest_cpu >= nr_cpu_ids)) {
+-              cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+-              dest_cpu = cpumask_any(cpu_active_mask);
+-
++              dest_cpu = cpuset_cpus_allowed_fallback(p);
+               /*
+                * Don't tell them about moving exiting tasks or
+                * kernel threads (both mm NULL), since they never
diff --git a/queue-2.6.32/sched-more-generic-wake_affine-vs-select_idle_sibling.patch b/queue-2.6.32/sched-more-generic-wake_affine-vs-select_idle_sibling.patch

new file mode 100644 (file)

index 0000000..40b58b5
--- /dev/null
+++ b/queue-2.6.32/sched-more-generic-wake_affine-vs-select_idle_sibling.patch
@@ -0,0 +1,91 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:19:43 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu, 12 Nov 2009 15:55:29 +0100
+Subject: sched: More generic WAKE_AFFINE vs select_idle_sibling()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <4fe736bd5f08977bf198f67dd272162a061c1a02.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit fe3bcfe1f6c1fc4ea7706ac2d05e579fd9092682 upstream
+
+Instead of only considering SD_WAKE_AFFINE | SD_PREFER_SIBLING
+domains also allow all SD_PREFER_SIBLING domains below a
+SD_WAKE_AFFINE domain to change the affinity target.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091112145610.909723612@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |   33 ++++++++++++++++-----------------
+ 1 file changed, 16 insertions(+), 17 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1398,20 +1398,16 @@ select_idle_sibling(struct task_struct *
+        * test in select_task_rq_fair) and the prev_cpu is idle then that's
+        * always a better target than the current cpu.
+        */
+-      if (target == cpu) {
+-              if (!cpu_rq(prev_cpu)->cfs.nr_running)
+-                      target = prev_cpu;
+-      }
++      if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running)
++              return prev_cpu;
+ 
+       /*
+        * Otherwise, iterate the domain and find an elegible idle cpu.
+        */
+-      if (target == -1 || target == cpu) {
+-              for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
+-                      if (!cpu_rq(i)->cfs.nr_running) {
+-                              target = i;
+-                              break;
+-                      }
++      for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
++              if (!cpu_rq(i)->cfs.nr_running) {
++                      target = i;
++                      break;
+               }
+       }
+ 
+@@ -1475,7 +1471,12 @@ select_task_rq_fair(struct rq *rq, struc
+                               want_sd = 0;
+               }
+ 
+-              if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
++              /*
++               * While iterating the domains looking for a spanning
++               * WAKE_AFFINE domain, adjust the affine target to any idle cpu
++               * in cache sharing domains along the way.
++               */
++              if (want_affine) {
+                       int target = -1;
+ 
+                       /*
+@@ -1488,17 +1489,15 @@ select_task_rq_fair(struct rq *rq, struc
+                       /*
+                        * If there's an idle sibling in this domain, make that
+                        * the wake_affine target instead of the current cpu.
+-                       *
+-                       * XXX: should we possibly do this outside of
+-                       * WAKE_AFFINE, in case the shared cache domain is
+-                       * smaller than the WAKE_AFFINE domain?
+                        */
+                       if (tmp->flags & SD_PREFER_SIBLING)
+                               target = select_idle_sibling(p, tmp, target);
+ 
+                       if (target >= 0) {
+-                              affine_sd = tmp;
+-                              want_affine = 0;
++                              if (tmp->flags & SD_WAKE_AFFINE) {
++                                      affine_sd = tmp;
++                                      want_affine = 0;
++                              }
+                               cpu = target;
+                       }
+               }
diff --git a/queue-2.6.32/sched-move_task_off_dead_cpu-remove-retry-logic.patch b/queue-2.6.32/sched-move_task_off_dead_cpu-remove-retry-logic.patch

new file mode 100644 (file)

index 0000000..bce1a34
--- /dev/null
+++ b/queue-2.6.32/sched-move_task_off_dead_cpu-remove-retry-logic.patch
@@ -0,0 +1,62 @@
+From oleg@redhat.com  Fri Sep 17 18:15:27 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:14 +0100
+Subject: sched: move_task_off_dead_cpu(): Remove retry logic
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <d61f978b6a63cf12e26234bf81629a001c2221d0.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit c1804d547dc098363443667609c272d1e4d15ee8 upstream
+
+The previous patch preserved the retry logic, but it looks unneeded.
+
+__migrate_task() can only fail if we raced with migration after we dropped
+the lock, but in this case the caller of set_cpus_allowed/etc must initiate
+migration itself if ->on_rq == T.
+
+We already fixed p->cpus_allowed, the changes in active/online masks must
+be visible to racer, it should migrate the task to online cpu correctly.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091014.GA9138@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -7407,7 +7407,7 @@ static void move_task_off_dead_cpu(int d
+       struct rq *rq = cpu_rq(dead_cpu);
+       int needs_cpu, uninitialized_var(dest_cpu);
+       unsigned long flags;
+-again:
++
+       local_irq_save(flags);
+ 
+       spin_lock(&rq->lock);
+@@ -7415,14 +7415,13 @@ again:
+       if (needs_cpu)
+               dest_cpu = select_fallback_rq(dead_cpu, p);
+       spin_unlock(&rq->lock);
+-
+-      /* It can have affinity changed while we were choosing. */
++      /*
++       * It can only fail if we race with set_cpus_allowed(),
++       * in the racer should migrate the task anyway.
++       */
+       if (needs_cpu)
+-              needs_cpu = !__migrate_task(p, dead_cpu, dest_cpu);
++              __migrate_task(p, dead_cpu, dest_cpu);
+       local_irq_restore(flags);
+-
+-      if (unlikely(needs_cpu))
+-              goto again;
+ }
+ 
+ /*
diff --git a/queue-2.6.32/sched-move_task_off_dead_cpu-take-rq-lock-around-select_fallback_rq.patch b/queue-2.6.32/sched-move_task_off_dead_cpu-take-rq-lock-around-select_fallback_rq.patch

new file mode 100644 (file)

index 0000000..37aa5c7
--- /dev/null
+++ b/queue-2.6.32/sched-move_task_off_dead_cpu-take-rq-lock-around-select_fallback_rq.patch
@@ -0,0 +1,88 @@
+From oleg@redhat.com  Fri Sep 17 18:15:12 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:10 +0100
+Subject: sched: move_task_off_dead_cpu(): Take rq->lock around select_fallback_rq()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <f0c871a27f468c7e4c8cbe43a79f506dc323b9b6.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 1445c08d06c5594895b4fae952ef8a457e89c390 upstream
+
+move_task_off_dead_cpu()->select_fallback_rq() reads/updates ->cpus_allowed
+lockless. We can race with set_cpus_allowed() running in parallel.
+
+Change it to take rq->lock around select_fallback_rq(). Note that it is not
+trivial to move this spin_lock() into select_fallback_rq(), we must recheck
+the task was not migrated after we take the lock and other callers do not
+need this lock.
+
+To avoid the races with other callers of select_fallback_rq() which rely on
+TASK_WAKING, we also check p->state != TASK_WAKING and do nothing otherwise.
+The owner of TASK_WAKING must update ->cpus_allowed and choose the correct
+CPU anyway, and the subsequent __migrate_task() is just meaningless because
+p->se.on_rq must be false.
+
+Alternatively, we could change select_task_rq() to take rq->lock right
+after it calls sched_class->select_task_rq(), but this looks a bit ugly.
+
+Also, change it to not assume irqs are disabled and absorb __migrate_task_irq().
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091010.GA9131@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -7399,29 +7399,29 @@ static int migration_thread(void *data)
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-
+-static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
+-{
+-      int ret;
+-
+-      local_irq_disable();
+-      ret = __migrate_task(p, src_cpu, dest_cpu);
+-      local_irq_enable();
+-      return ret;
+-}
+-
+ /*
+  * Figure out where task on dead CPU should go, use force if necessary.
+  */
+ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ {
+-      int dest_cpu;
+-
++      struct rq *rq = cpu_rq(dead_cpu);
++      int needs_cpu, uninitialized_var(dest_cpu);
++      unsigned long flags;
+ again:
+-      dest_cpu = select_fallback_rq(dead_cpu, p);
++      local_irq_save(flags);
++
++      spin_lock(&rq->lock);
++      needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
++      if (needs_cpu)
++              dest_cpu = select_fallback_rq(dead_cpu, p);
++      spin_unlock(&rq->lock);
+ 
+       /* It can have affinity changed while we were choosing. */
+-      if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
++      if (needs_cpu)
++              needs_cpu = !__migrate_task(p, dead_cpu, dest_cpu);
++      local_irq_restore(flags);
++
++      if (unlikely(needs_cpu))
+               goto again;
+ }
+ 
diff --git a/queue-2.6.32/sched-optimize-task_rq_lock.patch b/queue-2.6.32/sched-optimize-task_rq_lock.patch

new file mode 100644 (file)

index 0000000..ccd09f6
--- /dev/null
+++ b/queue-2.6.32/sched-optimize-task_rq_lock.patch
@@ -0,0 +1,87 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:18:19 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu, 25 Mar 2010 21:05:16 +0100
+Subject: sched: Optimize task_rq_lock()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <abb25d422e8ff6033d0feee1ae9a47377ed5df8e.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 65cc8e4859ff29a9ddc989c88557d6059834c2a2 upstream
+
+Now that we hold the rq->lock over set_task_cpu() again, we can do
+away with most of the TASK_WAKING checks and reduce them again to
+set_cpus_allowed_ptr().
+
+Removes some conditionals from scheduling hot-paths.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Oleg Nesterov <oleg@redhat.com>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -942,8 +942,8 @@ static inline void finish_lock_switch(st
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+ 
+ /*
+- * Check whether the task is waking, we use this to synchronize against
+- * ttwu() so that task_cpu() reports a stable number.
++ * Check whether the task is waking, we use this to synchronize ->cpus_allowed
++ * against ttwu().
+  */
+ static inline int task_is_waking(struct task_struct *p)
+ {
+@@ -960,11 +960,9 @@ static inline struct rq *__task_rq_lock(
+       struct rq *rq;
+ 
+       for (;;) {
+-              while (task_is_waking(p))
+-                      cpu_relax();
+               rq = task_rq(p);
+               spin_lock(&rq->lock);
+-              if (likely(rq == task_rq(p) && !task_is_waking(p)))
++              if (likely(rq == task_rq(p)))
+                       return rq;
+               spin_unlock(&rq->lock);
+       }
+@@ -981,12 +979,10 @@ static struct rq *task_rq_lock(struct ta
+       struct rq *rq;
+ 
+       for (;;) {
+-              while (task_is_waking(p))
+-                      cpu_relax();
+               local_irq_save(*flags);
+               rq = task_rq(p);
+               spin_lock(&rq->lock);
+-              if (likely(rq == task_rq(p) && !task_is_waking(p)))
++              if (likely(rq == task_rq(p)))
+                       return rq;
+               spin_unlock_irqrestore(&rq->lock, *flags);
+       }
+@@ -7213,7 +7209,18 @@ int set_cpus_allowed_ptr(struct task_str
+       struct rq *rq;
+       int ret = 0;
+ 
++      /*
++       * Serialize against TASK_WAKING so that ttwu() and wunt() can
++       * drop the rq->lock and still rely on ->cpus_allowed.
++       */
++again:
++      while (task_is_waking(p))
++              cpu_relax();
+       rq = task_rq_lock(p, &flags);
++      if (task_is_waking(p)) {
++              task_rq_unlock(rq, &flags);
++              goto again;
++      }
+ 
+       if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+               ret = -EINVAL;
diff --git a/queue-2.6.32/sched-pre-compute-cpumask_weight-sched_domain_span-sd.patch b/queue-2.6.32/sched-pre-compute-cpumask_weight-sched_domain_span-sd.patch

new file mode 100644 (file)

index 0000000..fb27407
--- /dev/null
+++ b/queue-2.6.32/sched-pre-compute-cpumask_weight-sched_domain_span-sd.patch
@@ -0,0 +1,95 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:20:23 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 16 Apr 2010 14:59:29 +0200
+Subject: sched: Pre-compute cpumask_weight(sched_domain_span(sd))
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <0c6c762bcaa163e06a13da32043ad968d1473188.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 669c55e9f99b90e46eaa0f98a67ec53d46dc969a upstream
+
+Dave reported that his large SPARC machines spend lots of time in
+hweight64(), try and optimize some of those needless cpumask_weight()
+invocations (esp. with the large offstack cpumasks these are very
+expensive indeed).
+
+Reported-by: David Miller <davem@davemloft.net>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    1 +
+ kernel/sched.c        |    7 +++++--
+ kernel/sched_fair.c   |    8 +++-----
+ 3 files changed, 9 insertions(+), 7 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1000,6 +1000,7 @@ struct sched_domain {
+       char *name;
+ #endif
+ 
++      unsigned int span_weight;
+       /*
+        * Span of all CPUs in this domain.
+        *
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -3678,7 +3678,7 @@ unsigned long __weak arch_scale_freq_pow
+ 
+ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+ {
+-      unsigned long weight = cpumask_weight(sched_domain_span(sd));
++      unsigned long weight = sd->span_weight;
+       unsigned long smt_gain = sd->smt_gain;
+ 
+       smt_gain /= weight;
+@@ -3711,7 +3711,7 @@ unsigned long scale_rt_power(int cpu)
+ 
+ static void update_cpu_power(struct sched_domain *sd, int cpu)
+ {
+-      unsigned long weight = cpumask_weight(sched_domain_span(sd));
++      unsigned long weight = sd->span_weight;
+       unsigned long power = SCHED_LOAD_SCALE;
+       struct sched_group *sdg = sd->groups;
+ 
+@@ -8166,6 +8166,9 @@ cpu_attach_domain(struct sched_domain *s
+       struct rq *rq = cpu_rq(cpu);
+       struct sched_domain *tmp;
+ 
++      for (tmp = sd; tmp; tmp = tmp->parent)
++              tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
++
+       /* Remove the sched domains which do not contribute to scheduling. */
+       for (tmp = sd; tmp; ) {
+               struct sched_domain *parent = tmp->parent;
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1520,9 +1520,7 @@ select_task_rq_fair(struct rq *rq, struc
+                * Pick the largest domain to update shares over
+                */
+               tmp = sd;
+-              if (affine_sd && (!tmp ||
+-                                cpumask_weight(sched_domain_span(affine_sd)) >
+-                                cpumask_weight(sched_domain_span(sd))))
++              if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
+                       tmp = affine_sd;
+ 
+               if (tmp) {
+@@ -1566,10 +1564,10 @@ select_task_rq_fair(struct rq *rq, struc
+ 
+               /* Now try balancing at a lower domain level of new_cpu */
+               cpu = new_cpu;
+-              weight = cpumask_weight(sched_domain_span(sd));
++              weight = sd->span_weight;
+               sd = NULL;
+               for_each_domain(cpu, tmp) {
+-                      if (weight <= cpumask_weight(sched_domain_span(tmp)))
++                      if (weight <= tmp->span_weight)
+                               break;
+                       if (tmp->flags & sd_flag)
+                               sd = tmp;
diff --git a/queue-2.6.32/sched-queue-a-deboosted-task-to-the-head-of-the-rt-prio-queue.patch b/queue-2.6.32/sched-queue-a-deboosted-task-to-the-head-of-the-rt-prio-queue.patch

new file mode 100644 (file)

index 0000000..61683df
--- /dev/null
+++ b/queue-2.6.32/sched-queue-a-deboosted-task-to-the-head-of-the-rt-prio-queue.patch
@@ -0,0 +1,56 @@
+From tglx@linutronix.de  Fri Sep 17 18:14:25 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Jan 2010 20:59:06 +0000
+Subject: sched: Queue a deboosted task to the head of the RT prio queue
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <55050ebe52e5ca5834a6f847d19809cba5dc10a0.1283514307.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 60db48cacb9b253d5607a5ff206112a59cd09e34 upstream
+
+rtmutex_set_prio() is used to implement priority inheritance for
+futexes. When a task is deboosted it gets enqueued at the tail of its
+RT priority list. This is violating the POSIX scheduling semantics:
+
+rt priority list X contains two runnable tasks A and B
+
+task A  runs with priority X and holds mutex M
+task C  preempts A and is blocked on mutex M
+        -> task A is boosted to priority of task C (Y)
+task A  unlocks the mutex M and deboosts itself
+        -> A is dequeued from rt priority list Y
+        -> A is enqueued to the tail of rt priority list X
+task C  schedules away
+task B  runs
+
+This is wrong as task A did not schedule away and therefor violates
+the POSIX scheduling semantics.
+
+Enqueue the task to the head of the priority list instead.
+
+Reported-by: Mathias Weber <mathias.weber.mw1@roche.com>
+Reported-by: Carsten Emde <cbe@osadl.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Tested-by: Carsten Emde <cbe@osadl.org>
+Tested-by: Mathias Weber <mathias.weber.mw1@roche.com>
+LKML-Reference: <20100120171629.809074113@linutronix.de>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -6184,7 +6184,7 @@ void rt_mutex_setprio(struct task_struct
+       if (running)
+               p->sched_class->set_curr_task(rq);
+       if (on_rq) {
+-              enqueue_task(rq, p, 0, false);
++              enqueue_task(rq, p, 0, oldprio < prio);
+ 
+               check_class_changed(rq, p, prev_class, oldprio, running);
+       }
diff --git a/queue-2.6.32/sched-remove-unnecessary-rcu-exclusion.patch b/queue-2.6.32/sched-remove-unnecessary-rcu-exclusion.patch

new file mode 100644 (file)

index 0000000..4d2f370
--- /dev/null
+++ b/queue-2.6.32/sched-remove-unnecessary-rcu-exclusion.patch
@@ -0,0 +1,62 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:19:01 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Tue, 1 Dec 2009 12:21:47 +0100
+Subject: sched: Remove unnecessary RCU exclusion
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <96e351935dd8b98a2e436bf3e254fa3d91f4bd2d.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit fb58bac5c75bfff8bbf7d02071a10a62f32fe28b upstream
+
+As Nick pointed out, and realized by myself when doing:
+   sched: Fix balance vs hotplug race
+the patch:
+   sched: for_each_domain() vs RCU
+
+is wrong, sched_domains are freed after synchronize_sched(), which
+means disabling preemption is enough.
+
+Reported-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |    9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1410,7 +1410,6 @@ select_task_rq_fair(struct rq *rq, struc
+               new_cpu = prev_cpu;
+       }
+ 
+-      rcu_read_lock();
+       for_each_domain(cpu, tmp) {
+               if (!(tmp->flags & SD_LOAD_BALANCE))
+                       continue;
+@@ -1500,10 +1499,8 @@ select_task_rq_fair(struct rq *rq, struc
+               }
+       }
+ 
+-      if (affine_sd && wake_affine(affine_sd, p, sync)) {
+-              new_cpu = cpu;
+-              goto out;
+-      }
++      if (affine_sd && wake_affine(affine_sd, p, sync))
++              return cpu;
+ 
+       while (sd) {
+               int load_idx = sd->forkexec_idx;
+@@ -1544,8 +1541,6 @@ select_task_rq_fair(struct rq *rq, struc
+               /* while loop will break here if sd == NULL */
+       }
+ 
+-out:
+-      rcu_read_unlock();
+       return new_cpu;
+ }
+ #endif /* CONFIG_SMP */
diff --git a/queue-2.6.32/sched-sched_exec-remove-the-select_fallback_rq-logic.patch b/queue-2.6.32/sched-sched_exec-remove-the-select_fallback_rq-logic.patch

new file mode 100644 (file)

index 0000000..7db83a4
--- /dev/null
+++ b/queue-2.6.32/sched-sched_exec-remove-the-select_fallback_rq-logic.patch
@@ -0,0 +1,96 @@
+From 30da688ef6b76e01969b00608202fff1eed2accc Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:19 +0100
+Subject: sched: sched_exec(): Remove the select_fallback_rq() logic
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 30da688ef6b76e01969b00608202fff1eed2accc upstream.
+
+sched_exec()->select_task_rq() reads/updates ->cpus_allowed lockless.
+This can race with other CPUs updating our ->cpus_allowed, and this
+looks meaningless to me.
+
+The task is current and running, it must have online cpus in ->cpus_allowed,
+the fallback mode is bogus. And, if ->sched_class returns the "wrong" cpu,
+this likely means we raced with set_cpus_allowed() which was called
+for reason, why should sched_exec() retry and call ->select_task_rq()
+again?
+
+Change the code to call sched_class->select_task_rq() directly and do
+nothing if the returned cpu is wrong after re-checking under rq->lock.
+
+From now task_struct->cpus_allowed is always stable under TASK_WAKING,
+select_fallback_rq() is always called under rq-lock or the caller or
+the caller owns TASK_WAKING (select_task_rq).
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091019.GA9141@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c |   25 ++++++++-----------------
+ 1 file changed, 8 insertions(+), 17 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2333,6 +2333,9 @@ void task_oncpu_function_call(struct tas
+ }
+ 
+ #ifdef CONFIG_SMP
++/*
++ * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
++ */
+ static int select_fallback_rq(int cpu, struct task_struct *p)
+ {
+       int dest_cpu;
+@@ -2369,12 +2372,7 @@ static int select_fallback_rq(int cpu, s
+ }
+ 
+ /*
+- * Gets called from 3 sites (exec, fork, wakeup), since it is called without
+- * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
+- * by:
+- *
+- *  exec:           is unstable, retry loop
+- *  fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
++ * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
+  */
+ static inline
+ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+@@ -3223,9 +3221,8 @@ void sched_exec(void)
+       unsigned long flags;
+       struct rq *rq;
+ 
+-again:
+       this_cpu = get_cpu();
+-      dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
++      dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
+       if (dest_cpu == this_cpu) {
+               put_cpu();
+               return;
+@@ -3233,18 +3230,12 @@ again:
+ 
+       rq = task_rq_lock(p, &flags);
+       put_cpu();
+-
+       /*
+        * select_task_rq() can race against ->cpus_allowed
+        */
+-      if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
+-          || unlikely(!cpu_active(dest_cpu))) {
+-              task_rq_unlock(rq, &flags);
+-              goto again;
+-      }
+-
+-      /* force the process onto the specified CPU */
+-      if (migrate_task(p, dest_cpu, &req)) {
++      if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
++          likely(cpu_active(dest_cpu)) &&
++          migrate_task(p, dest_cpu, &req)) {
+               /* Need to wait for migration thread (might exit: take ref). */
+               struct task_struct *mt = rq->migration_thread;
+ 
diff --git a/queue-2.6.32/sched-set_cpus_allowed_ptr-don-t-use-rq-migration_thread-after-unlock.patch b/queue-2.6.32/sched-set_cpus_allowed_ptr-don-t-use-rq-migration_thread-after-unlock.patch

new file mode 100644 (file)

index 0000000..bc81017
--- /dev/null
+++ b/queue-2.6.32/sched-set_cpus_allowed_ptr-don-t-use-rq-migration_thread-after-unlock.patch
@@ -0,0 +1,37 @@
+From oleg@redhat.com  Fri Sep 17 18:14:40 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Tue, 30 Mar 2010 18:58:29 +0200
+Subject: sched: set_cpus_allowed_ptr(): Don't use rq->migration_thread after unlock
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <bb11665c972dd1d8ad681538e851ed2d9cc6741d.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 47a70985e5c093ae03d8ccf633c70a93761d86f2 upstream
+
+Trivial typo fix. rq->migration_thread can be NULL after
+task_rq_unlock(), this is why we have "mt" which should be
+ used instead.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100330165829.GA18284@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -7273,7 +7273,7 @@ int set_cpus_allowed_ptr(struct task_str
+ 
+               get_task_struct(mt);
+               task_rq_unlock(rq, &flags);
+-              wake_up_process(rq->migration_thread);
++              wake_up_process(mt);
+               put_task_struct(mt);
+               wait_for_completion(&req.done);
+               tlb_migrate_finish(p->mm);
diff --git a/queue-2.6.32/series b/queue-2.6.32/series

index 58ea8fddf282575ed942183597776d98c4a7bf24..7fd031394e1e2b69b3b936a2d51e0e6b022b3b93 100644 (file)
--- a/queue-2.6.32/series
+++ b/queue-2.6.32/series
@@ -84,3 +84,28 @@ sched-add-pre-and-post-wakeup-hooks.patch
  sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch
  sched-fix-hotplug-hang.patch
  sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch
+sched-fix-incorrect-sanity-check.patch
+sched-fix-race-between-ttwu-and-task_rq_lock.patch
+sched-extend-enqueue_task-to-allow-head-queueing.patch
+sched-implement-head-queueing-for-sched_rt.patch
+sched-queue-a-deboosted-task-to-the-head-of-the-rt-prio-queue.patch
+sched-set_cpus_allowed_ptr-don-t-use-rq-migration_thread-after-unlock.patch
+sched-kill-the-broken-and-deadlockable-cpuset_lock-cpuset_cpus_allowed_locked-code.patch
+sched-move_task_off_dead_cpu-take-rq-lock-around-select_fallback_rq.patch
+sched-move_task_off_dead_cpu-remove-retry-logic.patch
+sched-sched_exec-remove-the-select_fallback_rq-logic.patch
+sched-_cpu_down-don-t-play-with-current-cpus_allowed.patch
+sched-make-select_fallback_rq-cpuset-friendly.patch
+sched-fix-task_waking-vs-fork-deadlock.patch
+sched-optimize-task_rq_lock.patch
+sched-fix-nr_uninterruptible-count.patch
+sched-fix-rq-clock-synchronization-when-migrating-tasks.patch
+sched-remove-unnecessary-rcu-exclusion.patch
+sched-apply-rcu-protection-to-wake_affine.patch
+sched-cleanup-select_task_rq_fair.patch
+sched-more-generic-wake_affine-vs-select_idle_sibling.patch
+sched-fix-vmark-regression-on-big-machines.patch
+sched-fix-select_idle_sibling.patch
+sched-pre-compute-cpumask_weight-sched_domain_span-sd.patch
+sched-fix-select_idle_sibling-logic-in-select_task_rq_fair.patch
+sched-cpuacct-use-bigger-percpu-counter-batch-values-for-stats-counters.patch
author	Greg Kroah-Hartman <gregkh@suse.de>
	Sat, 18 Sep 2010 01:21:53 +0000 (18:21 -0700)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Sat, 18 Sep 2010 01:21:53 +0000 (18:21 -0700)
queue-2.6.32/sched-_cpu_down-don-t-play-with-current-cpus_allowed.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-apply-rcu-protection-to-wake_affine.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-cleanup-select_task_rq_fair.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-cpuacct-use-bigger-percpu-counter-batch-values-for-stats-counters.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-extend-enqueue_task-to-allow-head-queueing.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-incorrect-sanity-check.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-nr_uninterruptible-count.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-race-between-ttwu-and-task_rq_lock.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-rq-clock-synchronization-when-migrating-tasks.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-select_idle_sibling-logic-in-select_task_rq_fair.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-select_idle_sibling.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-task_waking-vs-fork-deadlock.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-fix-vmark-regression-on-big-machines.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-implement-head-queueing-for-sched_rt.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-kill-the-broken-and-deadlockable-cpuset_lock-cpuset_cpus_allowed_locked-code.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-make-select_fallback_rq-cpuset-friendly.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-more-generic-wake_affine-vs-select_idle_sibling.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-move_task_off_dead_cpu-remove-retry-logic.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-move_task_off_dead_cpu-take-rq-lock-around-select_fallback_rq.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-optimize-task_rq_lock.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-pre-compute-cpumask_weight-sched_domain_span-sd.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-queue-a-deboosted-task-to-the-head-of-the-rt-prio-queue.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-remove-unnecessary-rcu-exclusion.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-sched_exec-remove-the-select_fallback_rq-logic.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/sched-set_cpus_allowed_ptr-don-t-use-rq-migration_thread-after-unlock.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.32/series		patch \| blob \| blame \| history