From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Sat, 18 Sep 2010 01:21:53 +0000 (-0700)
Subject: .32 patches
X-Git-Tag: v2.6.27.54~14
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6d5af50e8b296218fe17ac59b450b98839b2a780;p=thirdparty%2Fkernel%2Fstable-queue.git

.32 patches
---

diff --git a/queue-2.6.32/sched-_cpu_down-don-t-play-with-current-cpus_allowed.patch b/queue-2.6.32/sched-_cpu_down-don-t-play-with-current-cpus_allowed.patch
new file mode 100644
index 00000000000..5563af3e14a
--- /dev/null
+++ b/queue-2.6.32/sched-_cpu_down-don-t-play-with-current-cpus_allowed.patch
@@ -0,0 +1,141 @@
+From oleg@redhat.com  Fri Sep 17 18:17:33 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:23 +0100
+Subject: sched: _cpu_down(): Don't play with current->cpus_allowed
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <6ee57a0da8d81973a62d3c1ce12c5c96e2634b04.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 6a1bdc1b577ebcb65f6603c57f8347309bc4ab13 upstream
+
+_cpu_down() changes the current task's affinity and then recovers it at
+the end. The problems are well known: we can't restore old_allowed if it
+was bound to the now-dead-cpu, and we can race with the userspace which
+can change cpu-affinity during unplug.
+
+_cpu_down() should not play with current->cpus_allowed at all. Instead,
+take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
+removes the dying cpu from cpu_online_mask.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091023.GA9148@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    1 +
+ kernel/cpu.c          |   18 ++++++------------
+ kernel/sched.c        |    2 +-
+ 3 files changed, 8 insertions(+), 13 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1887,6 +1887,7 @@ extern void sched_clock_idle_sleep_event
+ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
++extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
+ extern void idle_task_exit(void);
+ #else
+ static inline void idle_task_exit(void) {}
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -163,6 +163,7 @@ static inline void check_for_tasks(int c
+ }
+ 
+ struct take_cpu_down_param {
++	struct task_struct *caller;
+ 	unsigned long mod;
+ 	void *hcpu;
+ };
+@@ -171,6 +172,7 @@ struct take_cpu_down_param {
+ static int __ref take_cpu_down(void *_param)
+ {
+ 	struct take_cpu_down_param *param = _param;
++	unsigned int cpu = (unsigned long)param->hcpu;
+ 	int err;
+ 
+ 	/* Ensure this CPU doesn't handle any more interrupts. */
+@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_pa
+ 	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ 				param->hcpu);
+ 
++	if (task_cpu(param->caller) == cpu)
++		move_task_off_dead_cpu(cpu, param->caller);
+ 	/* Force idle task to run as soon as we yield: it should
+ 	   immediately notice cpu is offline and die quickly. */
+ 	sched_idle_next();
+@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_pa
+ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ {
+ 	int err, nr_calls = 0;
+-	cpumask_var_t old_allowed;
+ 	void *hcpu = (void *)(long)cpu;
+ 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ 	struct take_cpu_down_param tcd_param = {
++		.caller = current,
+ 		.mod = mod,
+ 		.hcpu = hcpu,
+ 	};
+@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int
+ 	if (!cpu_online(cpu))
+ 		return -EINVAL;
+ 
+-	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
+-		return -ENOMEM;
+-
+ 	cpu_hotplug_begin();
+ 	set_cpu_active(cpu, false);
+ 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
+@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int
+ 		goto out_release;
+ 	}
+ 
+-	/* Ensure that we are not runnable on dying cpu */
+-	cpumask_copy(old_allowed, &current->cpus_allowed);
+-	set_cpus_allowed_ptr(current, cpu_active_mask);
+-
+ 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ 	if (err) {
+ 		set_cpu_active(cpu, true);
+@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int
+ 					    hcpu) == NOTIFY_BAD)
+ 			BUG();
+ 
+-		goto out_allowed;
++		goto out_release;
+ 	}
+ 	BUG_ON(cpu_online(cpu));
+ 
+@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int
+ 
+ 	check_for_tasks(cpu);
+ 
+-out_allowed:
+-	set_cpus_allowed_ptr(current, old_allowed);
+ out_release:
+ 	cpu_hotplug_done();
+ 	if (!err) {
+@@ -263,7 +258,6 @@ out_release:
+ 					    hcpu) == NOTIFY_BAD)
+ 			BUG();
+ 	}
+-	free_cpumask_var(old_allowed);
+ 	return err;
+ }
+ 
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -7393,7 +7393,7 @@ static int migration_thread(void *data)
+ /*
+  * Figure out where task on dead CPU should go, use force if necessary.
+  */
+-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
++void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ {
+ 	struct rq *rq = cpu_rq(dead_cpu);
+ 	int needs_cpu, uninitialized_var(dest_cpu);
diff --git a/queue-2.6.32/sched-apply-rcu-protection-to-wake_affine.patch b/queue-2.6.32/sched-apply-rcu-protection-to-wake_affine.patch
new file mode 100644
index 00000000000..01e24df4888
--- /dev/null
+++ b/queue-2.6.32/sched-apply-rcu-protection-to-wake_affine.patch
@@ -0,0 +1,53 @@
+From daniel.blueman@gmail.com  Fri Sep 17 18:19:12 2010
+From: Daniel J Blueman <daniel.blueman@gmail.com>
+Date: Tue, 1 Jun 2010 14:06:13 +0100
+Subject: sched: apply RCU protection to wake_affine()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <a0298aef3a2239c68e4874964036fa4f2dd3a79c.1283514307.git.efault@gmx.de>
+
+From: Daniel J Blueman <daniel.blueman@gmail.com>
+
+commit f3b577dec1f2ce32d2db6d2ca6badff7002512af upstream
+
+The task_group() function returns a pointer that must be protected
+by either RCU, the ->alloc_lock, or the cgroup lock (see the
+rcu_dereference_check() in task_subsys_state(), which is invoked by
+task_group()).  The wake_affine() function currently does none of these,
+which means that a concurrent update would be within its rights to free
+the structure returned by task_group().  Because wake_affine() uses this
+structure only to compute load-balancing heuristics, there is no reason
+to acquire either of the two locks.
+
+Therefore, this commit introduces an RCU read-side critical section that
+starts before the first call to task_group() and ends after the last use
+of the "tg" pointer returned from task_group().  Thanks to Li Zefan for
+pointing out the need to extend the RCU read-side critical section from
+that proposed by the original patch.
+
+Signed-off-by: Daniel J Blueman <daniel.blueman@gmail.com>
+Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1250,6 +1250,7 @@ static int wake_affine(struct sched_doma
+ 	 * effect of the currently running task from the load
+ 	 * of the current CPU:
+ 	 */
++	rcu_read_lock();
+ 	if (sync) {
+ 		tg = task_group(current);
+ 		weight = current->se.load.weight;
+@@ -1275,6 +1276,7 @@ static int wake_affine(struct sched_doma
+ 	balanced = !this_load ||
+ 		100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
+ 		imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
++	rcu_read_unlock();
+ 
+ 	/*
+ 	 * If the currently running task will sleep within
diff --git a/queue-2.6.32/sched-cleanup-select_task_rq_fair.patch b/queue-2.6.32/sched-cleanup-select_task_rq_fair.patch
new file mode 100644
index 00000000000..077246a7d8c
--- /dev/null
+++ b/queue-2.6.32/sched-cleanup-select_task_rq_fair.patch
@@ -0,0 +1,122 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:19:30 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu, 12 Nov 2009 15:55:28 +0100
+Subject: sched: Cleanup select_task_rq_fair()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <80263dd5bd5a2069a3907f0408ab2f73377f0b8a.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit a50bde5130f65733142b32975616427d0ea50856 upstream
+
+Clean up the new affine to idle sibling bits while trying to
+grok them. Should not have any function differences.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091112145610.832503781@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |   73 ++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 51 insertions(+), 22 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1384,6 +1384,41 @@ find_idlest_cpu(struct sched_group *grou
+ }
+ 
+ /*
++ * Try and locate an idle CPU in the sched_domain.
++ */
++static int
++select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
++{
++	int cpu = smp_processor_id();
++	int prev_cpu = task_cpu(p);
++	int i;
++
++	/*
++	 * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE
++	 * test in select_task_rq_fair) and the prev_cpu is idle then that's
++	 * always a better target than the current cpu.
++	 */
++	if (target == cpu) {
++		if (!cpu_rq(prev_cpu)->cfs.nr_running)
++			target = prev_cpu;
++	}
++
++	/*
++	 * Otherwise, iterate the domain and find an elegible idle cpu.
++	 */
++	if (target == -1 || target == cpu) {
++		for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
++			if (!cpu_rq(i)->cfs.nr_running) {
++				target = i;
++				break;
++			}
++		}
++	}
++
++	return target;
++}
++
++/*
+  * sched_balance_self: balance the current task (running on cpu) in domains
+  * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
+  * SD_BALANCE_EXEC.
+@@ -1441,36 +1476,30 @@ select_task_rq_fair(struct rq *rq, struc
+ 		}
+ 
+ 		if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
+-			int candidate = -1, i;
++			int target = -1;
+ 
++			/*
++			 * If both cpu and prev_cpu are part of this domain,
++			 * cpu is a valid SD_WAKE_AFFINE target.
++			 */
+ 			if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
+-				candidate = cpu;
++				target = cpu;
+ 
+ 			/*
+-			 * Check for an idle shared cache.
++			 * If there's an idle sibling in this domain, make that
++			 * the wake_affine target instead of the current cpu.
++			 *
++			 * XXX: should we possibly do this outside of
++			 * WAKE_AFFINE, in case the shared cache domain is
++			 * smaller than the WAKE_AFFINE domain?
+ 			 */
+-			if (tmp->flags & SD_PREFER_SIBLING) {
+-				if (candidate == cpu) {
+-					if (!cpu_rq(prev_cpu)->cfs.nr_running)
+-						candidate = prev_cpu;
+-				}
+-
+-				if (candidate == -1 || candidate == cpu) {
+-					for_each_cpu(i, sched_domain_span(tmp)) {
+-						if (!cpumask_test_cpu(i, &p->cpus_allowed))
+-							continue;
+-						if (!cpu_rq(i)->cfs.nr_running) {
+-							candidate = i;
+-							break;
+-						}
+-					}
+-				}
+-			}
++			if (tmp->flags & SD_PREFER_SIBLING)
++				target = select_idle_sibling(p, tmp, target);
+ 
+-			if (candidate >= 0) {
++			if (target >= 0) {
+ 				affine_sd = tmp;
+ 				want_affine = 0;
+-				cpu = candidate;
++				cpu = target;
+ 			}
+ 		}
+ 
diff --git a/queue-2.6.32/sched-cpuacct-use-bigger-percpu-counter-batch-values-for-stats-counters.patch b/queue-2.6.32/sched-cpuacct-use-bigger-percpu-counter-batch-values-for-stats-counters.patch
new file mode 100644
index 00000000000..097a9d35d1f
--- /dev/null
+++ b/queue-2.6.32/sched-cpuacct-use-bigger-percpu-counter-batch-values-for-stats-counters.patch
@@ -0,0 +1,110 @@
+From anton@samba.org  Fri Sep 17 18:20:49 2010
+From: Anton Blanchard <anton@samba.org>
+Date: Tue, 2 Feb 2010 14:46:13 -0800
+Subject: sched: cpuacct: Use bigger percpu counter batch values for stats counters
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <096b1867bf2f9b6a3fc6c4ed114a02c181d3d77e.1283514307.git.efault@gmx.de>
+
+From: Anton Blanchard <anton@samba.org>
+
+commit fa535a77bd3fa32b9215ba375d6a202fe73e1dd6 upstream
+
+When CONFIG_VIRT_CPU_ACCOUNTING and CONFIG_CGROUP_CPUACCT are
+enabled we can call cpuacct_update_stats with values much larger
+than percpu_counter_batch.  This means the call to
+percpu_counter_add will always add to the global count which is
+protected by a spinlock and we end up with a global spinlock in
+the scheduler.
+
+Based on an idea by KOSAKI Motohiro, this patch scales the batch
+value by cputime_one_jiffy such that we have the same batch
+limit as we would if CONFIG_VIRT_CPU_ACCOUNTING was disabled.
+His patch did this once at boot but that initialisation happened
+too early on PowerPC (before time_init) and it was never updated
+at runtime as a result of a hotplug cpu add/remove.
+
+This patch instead scales percpu_counter_batch by
+cputime_one_jiffy at runtime, which keeps the batch correct even
+after cpu hotplug operations.  We cap it at INT_MAX in case of
+overflow.
+
+For architectures that do not support
+CONFIG_VIRT_CPU_ACCOUNTING, cputime_one_jiffy is the constant 1
+and gcc is smart enough to optimise min(s32
+percpu_counter_batch, INT_MAX) to just percpu_counter_batch at
+least on x86 and PowerPC.  So there is no need to add an #ifdef.
+
+On a 64 thread PowerPC box with CONFIG_VIRT_CPU_ACCOUNTING and
+CONFIG_CGROUP_CPUACCT enabled, a context switch microbenchmark
+is 234x faster and almost matches a CONFIG_CGROUP_CPUACCT
+disabled kernel:
+
+ CONFIG_CGROUP_CPUACCT disabled:   16906698 ctx switches/sec
+ CONFIG_CGROUP_CPUACCT enabled:       61720 ctx switches/sec
+ CONFIG_CGROUP_CPUACCT + patch:	   16663217 ctx switches/sec
+
+Tested with:
+
+ wget http://ozlabs.org/~anton/junkcode/context_switch.c
+ make context_switch
+ for i in `seq 0 63`; do taskset -c $i ./context_switch & done
+ vmstat 1
+
+Signed-off-by: Anton Blanchard <anton@samba.org>
+Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
+Tested-by: Balbir Singh <balbir@linux.vnet.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Cc: "Luck, Tony" <tony.luck@intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -10934,12 +10934,30 @@ static void cpuacct_charge(struct task_s
+ }
+ 
+ /*
++ * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
++ * in cputime_t units. As a result, cpuacct_update_stats calls
++ * percpu_counter_add with values large enough to always overflow the
++ * per cpu batch limit causing bad SMP scalability.
++ *
++ * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
++ * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
++ * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
++ */
++#ifdef CONFIG_SMP
++#define CPUACCT_BATCH	\
++	min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
++#else
++#define CPUACCT_BATCH	0
++#endif
++
++/*
+  * Charge the system/user time to the task's accounting group.
+  */
+ static void cpuacct_update_stats(struct task_struct *tsk,
+ 		enum cpuacct_stat_index idx, cputime_t val)
+ {
+ 	struct cpuacct *ca;
++	int batch = CPUACCT_BATCH;
+ 
+ 	if (unlikely(!cpuacct_subsys.active))
+ 		return;
+@@ -10948,7 +10966,7 @@ static void cpuacct_update_stats(struct
+ 	ca = task_ca(tsk);
+ 
+ 	do {
+-		percpu_counter_add(&ca->cpustat[idx], val);
++		__percpu_counter_add(&ca->cpustat[idx], val, batch);
+ 		ca = ca->parent;
+ 	} while (ca);
+ 	rcu_read_unlock();
diff --git a/queue-2.6.32/sched-extend-enqueue_task-to-allow-head-queueing.patch b/queue-2.6.32/sched-extend-enqueue_task-to-allow-head-queueing.patch
new file mode 100644
index 00000000000..b3e8945ee47
--- /dev/null
+++ b/queue-2.6.32/sched-extend-enqueue_task-to-allow-head-queueing.patch
@@ -0,0 +1,122 @@
+From tglx@linutronix.de  Fri Sep 17 18:13:56 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Jan 2010 20:58:57 +0000
+Subject: sched: Extend enqueue_task to allow head queueing
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <e3b3be0a0a3a5c31d5e9f4243f9170302b0de6e5.1283514307.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit ea87bb7853168434f4a82426dd1ea8421f9e604d upstream
+
+The ability of enqueueing a task to the head of a SCHED_FIFO priority
+list is required to fix some violations of POSIX scheduling policy.
+
+Extend the related functions with a "head" argument.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Tested-by: Carsten Emde <cbe@osadl.org>
+Tested-by: Mathias Weber <mathias.weber.mw1@roche.com>
+LKML-Reference: <20100120171629.734886007@linutronix.de>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    3 ++-
+ kernel/sched.c        |   13 +++++++------
+ kernel/sched_fair.c   |    3 ++-
+ kernel/sched_rt.c     |    3 ++-
+ 4 files changed, 13 insertions(+), 9 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1071,7 +1071,8 @@ struct sched_domain;
+ struct sched_class {
+ 	const struct sched_class *next;
+ 
+-	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
++	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
++			      bool head);
+ 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+ 	void (*yield_task) (struct rq *rq);
+ 
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -1903,13 +1903,14 @@ static void update_avg(u64 *avg, u64 sam
+ 	*avg += diff >> 3;
+ }
+ 
+-static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+ 	if (wakeup)
+ 		p->se.start_runtime = p->se.sum_exec_runtime;
+ 
+ 	sched_info_queued(p);
+-	p->sched_class->enqueue_task(rq, p, wakeup);
++	p->sched_class->enqueue_task(rq, p, wakeup, head);
+ 	p->se.on_rq = 1;
+ }
+ 
+@@ -1985,7 +1986,7 @@ static void activate_task(struct rq *rq,
+ 	if (task_contributes_to_load(p))
+ 		rq->nr_uninterruptible--;
+ 
+-	enqueue_task(rq, p, wakeup);
++	enqueue_task(rq, p, wakeup, false);
+ 	inc_nr_running(rq);
+ }
+ 
+@@ -6183,7 +6184,7 @@ void rt_mutex_setprio(struct task_struct
+ 	if (running)
+ 		p->sched_class->set_curr_task(rq);
+ 	if (on_rq) {
+-		enqueue_task(rq, p, 0);
++		enqueue_task(rq, p, 0, false);
+ 
+ 		check_class_changed(rq, p, prev_class, oldprio, running);
+ 	}
+@@ -6227,7 +6228,7 @@ void set_user_nice(struct task_struct *p
+ 	delta = p->prio - old_prio;
+ 
+ 	if (on_rq) {
+-		enqueue_task(rq, p, 0);
++		enqueue_task(rq, p, 0, false);
+ 		/*
+ 		 * If the task increased its priority or is running and
+ 		 * lowered its priority, then reschedule its CPU:
+@@ -10180,7 +10181,7 @@ void sched_move_task(struct task_struct
+ 	if (unlikely(running))
+ 		tsk->sched_class->set_curr_task(rq);
+ 	if (on_rq)
+-		enqueue_task(rq, tsk, 0);
++		enqueue_task(rq, tsk, 0, false);
+ 
+ 	task_rq_unlock(rq, &flags);
+ }
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1031,7 +1031,8 @@ static inline void hrtick_update(struct
+  * increased. Here we update the fair scheduling stats and
+  * then put the task into the rbtree:
+  */
+-static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+ 	struct cfs_rq *cfs_rq;
+ 	struct sched_entity *se = &p->se;
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -878,7 +878,8 @@ static void dequeue_rt_entity(struct sch
+ /*
+  * Adding/removing a task to/from a priority array:
+  */
+-static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+ 	struct sched_rt_entity *rt_se = &p->rt;
+ 
diff --git a/queue-2.6.32/sched-fix-incorrect-sanity-check.patch b/queue-2.6.32/sched-fix-incorrect-sanity-check.patch
new file mode 100644
index 00000000000..201e69cb766
--- /dev/null
+++ b/queue-2.6.32/sched-fix-incorrect-sanity-check.patch
@@ -0,0 +1,36 @@
+From peterz@infradead.org  Fri Sep 17 18:13:26 2010
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 21 Jan 2010 16:34:27 +0100
+Subject: sched: Fix incorrect sanity check
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <550df2da0c2d00162a463923644fd024de95b890.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 11854247e2c851e7ff9ce138e501c6cffc5a4217 upstream
+
+We moved to migrate on wakeup, which means that sleeping tasks could
+still be present on offline cpus. Amend the check to only test running
+tasks.
+
+Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/cpu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -151,7 +151,7 @@ static inline void check_for_tasks(int c
+ 
+ 	write_lock_irq(&tasklist_lock);
+ 	for_each_process(p) {
+-		if (task_cpu(p) == cpu &&
++		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
+ 		    (!cputime_eq(p->utime, cputime_zero) ||
+ 		     !cputime_eq(p->stime, cputime_zero)))
+ 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
diff --git a/queue-2.6.32/sched-fix-nr_uninterruptible-count.patch b/queue-2.6.32/sched-fix-nr_uninterruptible-count.patch
new file mode 100644
index 00000000000..41d9c1fe56b
--- /dev/null
+++ b/queue-2.6.32/sched-fix-nr_uninterruptible-count.patch
@@ -0,0 +1,46 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:18:32 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 26 Mar 2010 12:22:14 +0100
+Subject: sched: Fix nr_uninterruptible count
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <c1b37a706324879a325f2ec268f2dc1b9958060c.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit cc87f76a601d2d256118f7bab15e35254356ae21 upstream
+
+The cpuload calculation in calc_load_account_active() assumes
+rq->nr_uninterruptible will not change on an offline cpu after
+migrate_nr_uninterruptible(). However the recent migrate on wakeup
+changes broke that and would result in decrementing the offline cpu's
+rq->nr_uninterruptible.
+
+Fix this by accounting the nr_uninterruptible on the waking cpu.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2435,8 +2435,12 @@ static int try_to_wake_up(struct task_st
+ 	 *
+ 	 * First fix up the nr_uninterruptible count:
+ 	 */
+-	if (task_contributes_to_load(p))
+-		rq->nr_uninterruptible--;
++	if (task_contributes_to_load(p)) {
++		if (likely(cpu_online(orig_cpu)))
++			rq->nr_uninterruptible--;
++		else
++			this_rq()->nr_uninterruptible--;
++	}
+ 	p->state = TASK_WAKING;
+ 
+ 	if (p->sched_class->task_waking)
diff --git a/queue-2.6.32/sched-fix-race-between-ttwu-and-task_rq_lock.patch b/queue-2.6.32/sched-fix-race-between-ttwu-and-task_rq_lock.patch
new file mode 100644
index 00000000000..ddb89fde825
--- /dev/null
+++ b/queue-2.6.32/sched-fix-race-between-ttwu-and-task_rq_lock.patch
@@ -0,0 +1,153 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:13:39 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Mon, 15 Feb 2010 14:45:54 +0100
+Subject: sched: Fix race between ttwu() and task_rq_lock()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <80faa6f269f4bd7825aec22056bbca743b5bd100.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 0970d2992dfd7d5ec2c787417cf464f01eeaf42a upstream
+
+Thomas found that due to ttwu() changing a task's cpu without holding
+the rq->lock, task_rq_lock() might end up locking the wrong rq.
+
+Avoid this by serializing against TASK_WAKING.
+
+Reported-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1266241712.15770.420.camel@laptop>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   71 ++++++++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 45 insertions(+), 26 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -942,16 +942,33 @@ static inline void finish_lock_switch(st
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+ 
+ /*
++ * Check whether the task is waking, we use this to synchronize against
++ * ttwu() so that task_cpu() reports a stable number.
++ *
++ * We need to make an exception for PF_STARTING tasks because the fork
++ * path might require task_rq_lock() to work, eg. it can call
++ * set_cpus_allowed_ptr() from the cpuset clone_ns code.
++ */
++static inline int task_is_waking(struct task_struct *p)
++{
++	return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
++}
++
++/*
+  * __task_rq_lock - lock the runqueue a given task resides on.
+  * Must be called interrupts disabled.
+  */
+ static inline struct rq *__task_rq_lock(struct task_struct *p)
+ 	__acquires(rq->lock)
+ {
++	struct rq *rq;
++
+ 	for (;;) {
+-		struct rq *rq = task_rq(p);
++		while (task_is_waking(p))
++			cpu_relax();
++		rq = task_rq(p);
+ 		spin_lock(&rq->lock);
+-		if (likely(rq == task_rq(p)))
++		if (likely(rq == task_rq(p) && !task_is_waking(p)))
+ 			return rq;
+ 		spin_unlock(&rq->lock);
+ 	}
+@@ -968,10 +985,12 @@ static struct rq *task_rq_lock(struct ta
+ 	struct rq *rq;
+ 
+ 	for (;;) {
++		while (task_is_waking(p))
++			cpu_relax();
+ 		local_irq_save(*flags);
+ 		rq = task_rq(p);
+ 		spin_lock(&rq->lock);
+-		if (likely(rq == task_rq(p)))
++		if (likely(rq == task_rq(p) && !task_is_waking(p)))
+ 			return rq;
+ 		spin_unlock_irqrestore(&rq->lock, *flags);
+ 	}
+@@ -2439,14 +2458,27 @@ static int try_to_wake_up(struct task_st
+ 	__task_rq_unlock(rq);
+ 
+ 	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+-	if (cpu != orig_cpu)
++	if (cpu != orig_cpu) {
++		/*
++		 * Since we migrate the task without holding any rq->lock,
++		 * we need to be careful with task_rq_lock(), since that
++		 * might end up locking an invalid rq.
++		 */
+ 		set_task_cpu(p, cpu);
++	}
+ 
+-	rq = __task_rq_lock(p);
++	rq = cpu_rq(cpu);
++	spin_lock(&rq->lock);
+ 	update_rq_clock(rq);
+ 
++	/*
++	 * We migrated the task without holding either rq->lock, however
++	 * since the task is not on the task list itself, nobody else
++	 * will try and migrate the task, hence the rq should match the
++	 * cpu we just moved it to.
++	 */
++	WARN_ON(task_cpu(p) != cpu);
+ 	WARN_ON(p->state != TASK_WAKING);
+-	cpu = task_cpu(p);
+ 
+ #ifdef CONFIG_SCHEDSTATS
+ 	schedstat_inc(rq, ttwu_count);
+@@ -2695,7 +2727,13 @@ void wake_up_new_task(struct task_struct
+ 	set_task_cpu(p, cpu);
+ #endif
+ 
+-	rq = task_rq_lock(p, &flags);
++	/*
++	 * Since the task is not on the rq and we still have TASK_WAKING set
++	 * nobody else will migrate this task.
++	 */
++	rq = cpu_rq(cpu);
++	spin_lock_irqsave(&rq->lock, flags);
++
+ 	BUG_ON(p->state != TASK_WAKING);
+ 	p->state = TASK_RUNNING;
+ 	update_rq_clock(rq);
+@@ -7204,27 +7242,8 @@ int set_cpus_allowed_ptr(struct task_str
+ 	struct rq *rq;
+ 	int ret = 0;
+ 
+-	/*
+-	 * Since we rely on wake-ups to migrate sleeping tasks, don't change
+-	 * the ->cpus_allowed mask from under waking tasks, which would be
+-	 * possible when we change rq->lock in ttwu(), so synchronize against
+-	 * TASK_WAKING to avoid that.
+-	 *
+-	 * Make an exception for freshly cloned tasks, since cpuset namespaces
+-	 * might move the task about, we have to validate the target in
+-	 * wake_up_new_task() anyway since the cpu might have gone away.
+-	 */
+-again:
+-	while (p->state == TASK_WAKING && !(p->flags & PF_STARTING))
+-		cpu_relax();
+-
+ 	rq = task_rq_lock(p, &flags);
+ 
+-	if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) {
+-		task_rq_unlock(rq, &flags);
+-		goto again;
+-	}
+-
+ 	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+ 		ret = -EINVAL;
+ 		goto out;
diff --git a/queue-2.6.32/sched-fix-rq-clock-synchronization-when-migrating-tasks.patch b/queue-2.6.32/sched-fix-rq-clock-synchronization-when-migrating-tasks.patch
new file mode 100644
index 00000000000..e21b05de792
--- /dev/null
+++ b/queue-2.6.32/sched-fix-rq-clock-synchronization-when-migrating-tasks.patch
@@ -0,0 +1,44 @@
+From peterz@infradead.org  Fri Sep 17 18:18:47 2010
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Thu, 19 Aug 2010 13:31:43 +0200
+Subject: sched: Fix rq->clock synchronization when migrating tasks
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <748cfa7664c3c3092de1cf8c86f96474f840bed6.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 861d034ee814917a83bd5de4b26e3b8336ddeeb8 upstream
+
+sched_fork() -- we do task placement in ->task_fork_fair() ensure we
+  update_rq_clock() so we work with current time. We leave the vruntime
+  in relative state, so the time delay until wake_up_new_task() doesn't
+  matter.
+
+wake_up_new_task() -- Since task_fork_fair() left p->vruntime in
+  relative state we can safely migrate, the activate_task() on the
+  remote rq will call update_rq_clock() and causes the clock to be
+  synced (enough).
+
+Tested-by: Jack Daniel <wanders.thirst@gmail.com>
+Tested-by: Philby John <pjohn@mvista.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1281002322.1923.1708.camel@laptop>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1963,6 +1963,8 @@ static void task_fork_fair(struct task_s
+ 
+ 	spin_lock_irqsave(&rq->lock, flags);
+ 
++	update_rq_clock(rq);
++
+ 	if (unlikely(task_cpu(p) != this_cpu))
+ 		__set_task_cpu(p, this_cpu);
+ 
diff --git a/queue-2.6.32/sched-fix-select_idle_sibling-logic-in-select_task_rq_fair.patch b/queue-2.6.32/sched-fix-select_idle_sibling-logic-in-select_task_rq_fair.patch
new file mode 100644
index 00000000000..08651a2596e
--- /dev/null
+++ b/queue-2.6.32/sched-fix-select_idle_sibling-logic-in-select_task_rq_fair.patch
@@ -0,0 +1,174 @@
+From suresh.b.siddha@intel.com  Fri Sep 17 18:20:36 2010
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+Date: Wed, 31 Mar 2010 16:47:45 -0700
+Subject: sched: Fix select_idle_sibling() logic in select_task_rq_fair()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <7c9917f68be3e57e65b938ff15cc6a2b1cc0da16.1283514307.git.efault@gmx.de>
+
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+
+commit 99bd5e2f245d8cd17d040c82d40becdb3efd9b69 upstream
+
+Issues in the current select_idle_sibling() logic in select_task_rq_fair()
+in the context of a task wake-up:
+
+a) Once we select the idle sibling, we use that domain (spanning the cpu that
+   the task is currently woken-up and the idle sibling that we found) in our
+   wake_affine() decisions. This domain is completely different from the
+   domain(we are supposed to use) that spans the cpu that the task currently
+   woken-up and the cpu where the task previously ran.
+
+b) We do select_idle_sibling() check only for the cpu that the task is
+   currently woken-up on. If select_task_rq_fair() selects the previously run
+   cpu for waking the task, doing a select_idle_sibling() check
+   for that cpu also helps and we don't do this currently.
+
+c) In the scenarios where the cpu that the task is woken-up is busy but
+   with its HT siblings are idle, we are selecting the task be woken-up
+   on the idle HT sibling instead of a core that it previously ran
+   and currently completely idle. i.e., we are not taking decisions based on
+   wake_affine() but directly selecting an idle sibling that can cause
+   an imbalance at the SMT/MC level which will be later corrected by the
+   periodic load balancer.
+
+Fix this by first going through the load imbalance calculations using
+wake_affine() and once we make a decision of woken-up cpu vs previously-ran cpu,
+then choose a possible idle sibling for waking up the task on.
+
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1270079265.7835.8.camel@sbs-t61.sc.intel.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |   82 +++++++++++++++++++++++++---------------------------
+ 1 file changed, 40 insertions(+), 42 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1386,29 +1386,48 @@ find_idlest_cpu(struct sched_group *grou
+ /*
+  * Try and locate an idle CPU in the sched_domain.
+  */
+-static int
+-select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
++static int select_idle_sibling(struct task_struct *p, int target)
+ {
+ 	int cpu = smp_processor_id();
+ 	int prev_cpu = task_cpu(p);
++	struct sched_domain *sd;
+ 	int i;
+ 
+ 	/*
+-	 * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE
+-	 * test in select_task_rq_fair) and the prev_cpu is idle then that's
+-	 * always a better target than the current cpu.
++	 * If the task is going to be woken-up on this cpu and if it is
++	 * already idle, then it is the right target.
++	 */
++	if (target == cpu && idle_cpu(cpu))
++		return cpu;
++
++	/*
++	 * If the task is going to be woken-up on the cpu where it previously
++	 * ran and if it is currently idle, then it the right target.
+ 	 */
+-	if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running)
++	if (target == prev_cpu && idle_cpu(prev_cpu))
+ 		return prev_cpu;
+ 
+ 	/*
+-	 * Otherwise, iterate the domain and find an elegible idle cpu.
++	 * Otherwise, iterate the domains and find an elegible idle cpu.
+ 	 */
+-	for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
+-		if (!cpu_rq(i)->cfs.nr_running) {
+-			target = i;
++	for_each_domain(target, sd) {
++		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
+ 			break;
++
++		for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
++			if (idle_cpu(i)) {
++				target = i;
++				break;
++			}
+ 		}
++
++		/*
++		 * Lets stop looking for an idle sibling when we reached
++		 * the domain that spans the current cpu and prev_cpu.
++		 */
++		if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
++		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
++			break;
+ 	}
+ 
+ 	return target;
+@@ -1432,7 +1451,7 @@ select_task_rq_fair(struct rq *rq, struc
+ 	int cpu = smp_processor_id();
+ 	int prev_cpu = task_cpu(p);
+ 	int new_cpu = cpu;
+-	int want_affine = 0, cpu_idle = !current->pid;
++	int want_affine = 0;
+ 	int want_sd = 1;
+ 	int sync = wake_flags & WF_SYNC;
+ 
+@@ -1472,36 +1491,13 @@ select_task_rq_fair(struct rq *rq, struc
+ 		}
+ 
+ 		/*
+-		 * While iterating the domains looking for a spanning
+-		 * WAKE_AFFINE domain, adjust the affine target to any idle cpu
+-		 * in cache sharing domains along the way.
++		 * If both cpu and prev_cpu are part of this domain,
++		 * cpu is a valid SD_WAKE_AFFINE target.
+ 		 */
+-		if (want_affine) {
+-			int target = -1;
+-
+-			/*
+-			 * If both cpu and prev_cpu are part of this domain,
+-			 * cpu is a valid SD_WAKE_AFFINE target.
+-			 */
+-			if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
+-				target = cpu;
+-
+-			/*
+-			 * If there's an idle sibling in this domain, make that
+-			 * the wake_affine target instead of the current cpu.
+-			 */
+-			if (!cpu_idle && tmp->flags & SD_SHARE_PKG_RESOURCES)
+-				target = select_idle_sibling(p, tmp, target);
+-
+-			if (target >= 0) {
+-				if (tmp->flags & SD_WAKE_AFFINE) {
+-					affine_sd = tmp;
+-					want_affine = 0;
+-					if (target != cpu)
+-						cpu_idle = 1;
+-				}
+-				cpu = target;
+-			}
++		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
++		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
++			affine_sd = tmp;
++			want_affine = 0;
+ 		}
+ 
+ 		if (!want_sd && !want_affine)
+@@ -1532,8 +1528,10 @@ select_task_rq_fair(struct rq *rq, struc
+ #endif
+ 
+ 	if (affine_sd) {
+-		if (cpu_idle || cpu == prev_cpu || wake_affine(affine_sd, p, sync))
+-			return cpu;
++		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
++			return select_idle_sibling(p, cpu);
++		else
++			return select_idle_sibling(p, prev_cpu);
+ 	}
+ 
+ 	while (sd) {
diff --git a/queue-2.6.32/sched-fix-select_idle_sibling.patch b/queue-2.6.32/sched-fix-select_idle_sibling.patch
new file mode 100644
index 00000000000..79629d17e6a
--- /dev/null
+++ b/queue-2.6.32/sched-fix-select_idle_sibling.patch
@@ -0,0 +1,76 @@
+From efault@gmx.de  Fri Sep 17 18:20:11 2010
+From: Mike Galbraith <efault@gmx.de>
+Date: Thu, 11 Mar 2010 17:17:16 +0100
+Subject: sched: Fix select_idle_sibling()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <2dc48f18ab671dc1c87c87dba674ff4b755d17ff.1283514307.git.efault@gmx.de>
+
+From: Mike Galbraith <efault@gmx.de>
+
+commit 8b911acdf08477c059d1c36c21113ab1696c612b upstream
+
+Don't bother with selection when the current cpu is idle.  Recent load
+balancing changes also make it no longer necessary to check wake_affine()
+success before returning the selected sibling, so we now always use it.
+
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1268301369.6785.36.camel@marge.simson.net>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1432,7 +1432,7 @@ select_task_rq_fair(struct rq *rq, struc
+ 	int cpu = smp_processor_id();
+ 	int prev_cpu = task_cpu(p);
+ 	int new_cpu = cpu;
+-	int want_affine = 0;
++	int want_affine = 0, cpu_idle = !current->pid;
+ 	int want_sd = 1;
+ 	int sync = wake_flags & WF_SYNC;
+ 
+@@ -1490,13 +1490,15 @@ select_task_rq_fair(struct rq *rq, struc
+ 			 * If there's an idle sibling in this domain, make that
+ 			 * the wake_affine target instead of the current cpu.
+ 			 */
+-			if (tmp->flags & SD_SHARE_PKG_RESOURCES)
++			if (!cpu_idle && tmp->flags & SD_SHARE_PKG_RESOURCES)
+ 				target = select_idle_sibling(p, tmp, target);
+ 
+ 			if (target >= 0) {
+ 				if (tmp->flags & SD_WAKE_AFFINE) {
+ 					affine_sd = tmp;
+ 					want_affine = 0;
++					if (target != cpu)
++						cpu_idle = 1;
+ 				}
+ 				cpu = target;
+ 			}
+@@ -1512,6 +1514,7 @@ select_task_rq_fair(struct rq *rq, struc
+ 			sd = tmp;
+ 	}
+ 
++#ifdef CONFIG_FAIR_GROUP_SCHED
+ 	if (sched_feat(LB_SHARES_UPDATE)) {
+ 		/*
+ 		 * Pick the largest domain to update shares over
+@@ -1528,9 +1531,12 @@ select_task_rq_fair(struct rq *rq, struc
+ 			spin_lock(&rq->lock);
+ 		}
+ 	}
++#endif
+ 
+-	if (affine_sd && wake_affine(affine_sd, p, sync))
+-		return cpu;
++	if (affine_sd) {
++		if (cpu_idle || cpu == prev_cpu || wake_affine(affine_sd, p, sync))
++			return cpu;
++	}
+ 
+ 	while (sd) {
+ 		int load_idx = sd->forkexec_idx;
diff --git a/queue-2.6.32/sched-fix-task_waking-vs-fork-deadlock.patch b/queue-2.6.32/sched-fix-task_waking-vs-fork-deadlock.patch
new file mode 100644
index 00000000000..fdd34c0878e
--- /dev/null
+++ b/queue-2.6.32/sched-fix-task_waking-vs-fork-deadlock.patch
@@ -0,0 +1,246 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:18:02 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Wed, 24 Mar 2010 18:34:10 +0100
+Subject: sched: Fix TASK_WAKING vs fork deadlock
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <1620f28b03b31be9190132c280a85fc1d08141a8.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 0017d735092844118bef006696a750a0e4ef6ebd upstream
+
+Oleg noticed a few races with the TASK_WAKING usage on fork.
+
+ - since TASK_WAKING is basically a spinlock, it should be IRQ safe
+ - since we set TASK_WAKING (*) without holding rq->lock it could
+   be there still is a rq->lock holder, thereby not actually
+   providing full serialization.
+
+(*) in fact we clear PF_STARTING, which in effect enables TASK_WAKING.
+
+Cure the second issue by not setting TASK_WAKING in sched_fork(), but
+only temporarily in wake_up_new_task() while calling select_task_rq().
+
+Cure the first by holding rq->lock around the select_task_rq() call,
+this will disable IRQs, this however requires that we push down the
+rq->lock release into select_task_rq_fair()'s cgroup stuff.
+
+Because select_task_rq_fair() still needs to drop the rq->lock we
+cannot fully get rid of TASK_WAKING.
+
+Reported-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h   |    3 +-
+ kernel/sched.c          |   65 +++++++++++++++++-------------------------------
+ kernel/sched_fair.c     |    8 ++++-
+ kernel/sched_idletask.c |    3 +-
+ kernel/sched_rt.c       |    5 +--
+ 5 files changed, 36 insertions(+), 48 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1082,7 +1082,8 @@ struct sched_class {
+ 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
+ 
+ #ifdef CONFIG_SMP
+-	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
++	int  (*select_task_rq)(struct rq *rq, struct task_struct *p,
++			       int sd_flag, int flags);
+ 
+ 	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
+ 			struct rq *busiest, unsigned long max_load_move,
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -944,14 +944,10 @@ static inline void finish_lock_switch(st
+ /*
+  * Check whether the task is waking, we use this to synchronize against
+  * ttwu() so that task_cpu() reports a stable number.
+- *
+- * We need to make an exception for PF_STARTING tasks because the fork
+- * path might require task_rq_lock() to work, eg. it can call
+- * set_cpus_allowed_ptr() from the cpuset clone_ns code.
+  */
+ static inline int task_is_waking(struct task_struct *p)
+ {
+-	return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
++	return unlikely(p->state == TASK_WAKING);
+ }
+ 
+ /*
+@@ -2373,9 +2369,9 @@ static int select_fallback_rq(int cpu, s
+  * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
+  */
+ static inline
+-int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
++int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
+ {
+-	int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
++	int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
+ 
+ 	/*
+ 	 * In order not to call set_task_cpu() on a blocking task we need
+@@ -2450,17 +2446,10 @@ static int try_to_wake_up(struct task_st
+ 	if (p->sched_class->task_waking)
+ 		p->sched_class->task_waking(rq, p);
+ 
+-	__task_rq_unlock(rq);
+-
+-	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+-	if (cpu != orig_cpu) {
+-		/*
+-		 * Since we migrate the task without holding any rq->lock,
+-		 * we need to be careful with task_rq_lock(), since that
+-		 * might end up locking an invalid rq.
+-		 */
++	cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
++	if (cpu != orig_cpu)
+ 		set_task_cpu(p, cpu);
+-	}
++	__task_rq_unlock(rq);
+ 
+ 	rq = cpu_rq(cpu);
+ 	spin_lock(&rq->lock);
+@@ -2638,11 +2627,11 @@ void sched_fork(struct task_struct *p, i
+ 
+ 	__sched_fork(p);
+ 	/*
+-	 * We mark the process as waking here. This guarantees that
++	 * We mark the process as running here. This guarantees that
+ 	 * nobody will actually run it, and a signal or other external
+ 	 * event cannot wake it up and insert it on the runqueue either.
+ 	 */
+-	p->state = TASK_WAKING;
++	p->state = TASK_RUNNING;
+ 
+ 	/*
+ 	 * Revert to default priority/policy on fork if requested.
+@@ -2709,28 +2698,25 @@ void wake_up_new_task(struct task_struct
+ 	int cpu = get_cpu();
+ 
+ #ifdef CONFIG_SMP
++	rq = task_rq_lock(p, &flags);
++	p->state = TASK_WAKING;
++
+ 	/*
+ 	 * Fork balancing, do it here and not earlier because:
+ 	 *  - cpus_allowed can change in the fork path
+ 	 *  - any previously selected cpu might disappear through hotplug
+ 	 *
+-	 * We still have TASK_WAKING but PF_STARTING is gone now, meaning
+-	 * ->cpus_allowed is stable, we have preemption disabled, meaning
+-	 * cpu_online_mask is stable.
++	 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
++	 * without people poking at ->cpus_allowed.
+ 	 */
+-	cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
++	cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
+ 	set_task_cpu(p, cpu);
+-#endif
+-
+-	/*
+-	 * Since the task is not on the rq and we still have TASK_WAKING set
+-	 * nobody else will migrate this task.
+-	 */
+-	rq = cpu_rq(cpu);
+-	spin_lock_irqsave(&rq->lock, flags);
+ 
+-	BUG_ON(p->state != TASK_WAKING);
+ 	p->state = TASK_RUNNING;
++	task_rq_unlock(rq, &flags);
++#endif
++
++	rq = task_rq_lock(p, &flags);
+ 	update_rq_clock(rq);
+ 	activate_task(rq, p, 0);
+ 	trace_sched_wakeup_new(rq, p, 1);
+@@ -3215,19 +3201,15 @@ void sched_exec(void)
+ {
+ 	struct task_struct *p = current;
+ 	struct migration_req req;
+-	int dest_cpu, this_cpu;
+ 	unsigned long flags;
+ 	struct rq *rq;
+-
+-	this_cpu = get_cpu();
+-	dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
+-	if (dest_cpu == this_cpu) {
+-		put_cpu();
+-		return;
+-	}
++	int dest_cpu;
+ 
+ 	rq = task_rq_lock(p, &flags);
+-	put_cpu();
++	dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
++	if (dest_cpu == smp_processor_id())
++		goto unlock;
++
+ 	/*
+ 	 * select_task_rq() can race against ->cpus_allowed
+ 	 */
+@@ -3245,6 +3227,7 @@ void sched_exec(void)
+ 
+ 		return;
+ 	}
++unlock:
+ 	task_rq_unlock(rq, &flags);
+ }
+ 
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1392,7 +1392,8 @@ find_idlest_cpu(struct sched_group *grou
+  *
+  * preempt must be disabled.
+  */
+-static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
++static int
++select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
+ {
+ 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+ 	int cpu = smp_processor_id();
+@@ -1492,8 +1493,11 @@ static int select_task_rq_fair(struct ta
+ 				  cpumask_weight(sched_domain_span(sd))))
+ 			tmp = affine_sd;
+ 
+-		if (tmp)
++		if (tmp) {
++			spin_unlock(&rq->lock);
+ 			update_shares(tmp);
++			spin_lock(&rq->lock);
++		}
+ 	}
+ 
+ 	if (affine_sd && wake_affine(affine_sd, p, sync)) {
+--- a/kernel/sched_idletask.c
++++ b/kernel/sched_idletask.c
+@@ -6,7 +6,8 @@
+  */
+ 
+ #ifdef CONFIG_SMP
+-static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
++static int
++select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+ {
+ 	return task_cpu(p); /* IDLE tasks as never migrated */
+ }
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -942,10 +942,9 @@ static void yield_task_rt(struct rq *rq)
+ #ifdef CONFIG_SMP
+ static int find_lowest_rq(struct task_struct *task);
+ 
+-static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
++static int
++select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+ {
+-	struct rq *rq = task_rq(p);
+-
+ 	if (sd_flag != SD_BALANCE_WAKE)
+ 		return smp_processor_id();
+ 
diff --git a/queue-2.6.32/sched-fix-vmark-regression-on-big-machines.patch b/queue-2.6.32/sched-fix-vmark-regression-on-big-machines.patch
new file mode 100644
index 00000000000..6d0899ddff3
--- /dev/null
+++ b/queue-2.6.32/sched-fix-vmark-regression-on-big-machines.patch
@@ -0,0 +1,51 @@
+From efault@gmx.de  Fri Sep 17 18:19:56 2010
+From: Mike Galbraith <efault@gmx.de>
+Date: Mon, 4 Jan 2010 14:44:56 +0100
+Subject: sched: Fix vmark regression on big machines
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <c0a4bd155e864c31aa575d64ae6330d563ed03fb.1283514307.git.efault@gmx.de>
+
+From: Mike Galbraith <efault@gmx.de>
+
+commit 50b926e439620c469565e8be0f28be78f5fca1ce upstream
+
+SD_PREFER_SIBLING is set at the CPU domain level if power saving isn't
+enabled, leading to many cache misses on large machines as we traverse
+looking for an idle shared cache to wake to.  Change the enabler of
+select_idle_sibling() to SD_SHARE_PKG_RESOURCES, and enable same at the
+sibling domain level.
+
+Reported-by: Lin Ming <ming.m.lin@intel.com>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <1262612696.15495.15.camel@marge.simson.net>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/topology.h |    2 +-
+ kernel/sched_fair.c      |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
+ 				| 1*SD_WAKE_AFFINE			\
+ 				| 1*SD_SHARE_CPUPOWER			\
+ 				| 0*SD_POWERSAVINGS_BALANCE		\
+-				| 0*SD_SHARE_PKG_RESOURCES		\
++				| 1*SD_SHARE_PKG_RESOURCES		\
+ 				| 0*SD_SERIALIZE			\
+ 				| 0*SD_PREFER_SIBLING			\
+ 				,					\
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1490,7 +1490,7 @@ select_task_rq_fair(struct rq *rq, struc
+ 			 * If there's an idle sibling in this domain, make that
+ 			 * the wake_affine target instead of the current cpu.
+ 			 */
+-			if (tmp->flags & SD_PREFER_SIBLING)
++			if (tmp->flags & SD_SHARE_PKG_RESOURCES)
+ 				target = select_idle_sibling(p, tmp, target);
+ 
+ 			if (target >= 0) {
diff --git a/queue-2.6.32/sched-implement-head-queueing-for-sched_rt.patch b/queue-2.6.32/sched-implement-head-queueing-for-sched_rt.patch
new file mode 100644
index 00000000000..04f0a091d29
--- /dev/null
+++ b/queue-2.6.32/sched-implement-head-queueing-for-sched_rt.patch
@@ -0,0 +1,101 @@
+From tglx@linutronix.de  Fri Sep 17 18:14:11 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Jan 2010 20:59:01 +0000
+Subject: sched: Implement head queueing for sched_rt
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <06654220e9d17d06d30535777dfbcdf5ab2d7e57.1283514307.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 37dad3fce97f01e5149d69de0833d8452c0e862e upstream
+
+The ability of enqueueing a task to the head of a SCHED_FIFO priority
+list is required to fix some violations of POSIX scheduling policy.
+
+Implement the functionality in sched_rt.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Tested-by: Carsten Emde <cbe@osadl.org>
+Tested-by: Mathias Weber <mathias.weber.mw1@roche.com>
+LKML-Reference: <20100120171629.772169931@linutronix.de>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_rt.c |   19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -194,7 +194,7 @@ static inline struct rt_rq *group_rt_rq(
+ 	return rt_se->my_q;
+ }
+ 
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+ 
+ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+@@ -204,7 +204,7 @@ static void sched_rt_rq_enqueue(struct r
+ 
+ 	if (rt_rq->rt_nr_running) {
+ 		if (rt_se && !on_rt_rq(rt_se))
+-			enqueue_rt_entity(rt_se);
++			enqueue_rt_entity(rt_se, false);
+ 		if (rt_rq->highest_prio.curr < curr->prio)
+ 			resched_task(curr);
+ 	}
+@@ -803,7 +803,7 @@ void dec_rt_tasks(struct sched_rt_entity
+ 	dec_rt_group(rt_se, rt_rq);
+ }
+ 
+-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
++static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ {
+ 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+ 	struct rt_prio_array *array = &rt_rq->active;
+@@ -819,7 +819,10 @@ static void __enqueue_rt_entity(struct s
+ 	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
+ 		return;
+ 
+-	list_add_tail(&rt_se->run_list, queue);
++	if (head)
++		list_add(&rt_se->run_list, queue);
++	else
++		list_add_tail(&rt_se->run_list, queue);
+ 	__set_bit(rt_se_prio(rt_se), array->bitmap);
+ 
+ 	inc_rt_tasks(rt_se, rt_rq);
+@@ -856,11 +859,11 @@ static void dequeue_rt_stack(struct sche
+ 	}
+ }
+ 
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ {
+ 	dequeue_rt_stack(rt_se);
+ 	for_each_sched_rt_entity(rt_se)
+-		__enqueue_rt_entity(rt_se);
++		__enqueue_rt_entity(rt_se, head);
+ }
+ 
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+@@ -871,7 +874,7 @@ static void dequeue_rt_entity(struct sch
+ 		struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ 
+ 		if (rt_rq && rt_rq->rt_nr_running)
+-			__enqueue_rt_entity(rt_se);
++			__enqueue_rt_entity(rt_se, false);
+ 	}
+ }
+ 
+@@ -886,7 +889,7 @@ enqueue_task_rt(struct rq *rq, struct ta
+ 	if (wakeup)
+ 		rt_se->timeout = 0;
+ 
+-	enqueue_rt_entity(rt_se);
++	enqueue_rt_entity(rt_se, head);
+ 
+ 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
+ 		enqueue_pushable_task(rq, p);
diff --git a/queue-2.6.32/sched-kill-the-broken-and-deadlockable-cpuset_lock-cpuset_cpus_allowed_locked-code.patch b/queue-2.6.32/sched-kill-the-broken-and-deadlockable-cpuset_lock-cpuset_cpus_allowed_locked-code.patch
new file mode 100644
index 00000000000..8c401077534
--- /dev/null
+++ b/queue-2.6.32/sched-kill-the-broken-and-deadlockable-cpuset_lock-cpuset_cpus_allowed_locked-code.patch
@@ -0,0 +1,171 @@
+From oleg@redhat.com  Fri Sep 17 18:14:53 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:03 +0100
+Subject: sched: Kill the broken and deadlockable cpuset_lock/cpuset_cpus_allowed_locked code
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <2ed3dbb00c3052ccb7ffda1e7a1d112e3d3f53f1.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 897f0b3c3ff40b443c84e271bef19bd6ae885195 upstream
+
+This patch just states the fact the cpusets/cpuhotplug interaction is
+broken and removes the deadlockable code which only pretends to work.
+
+- cpuset_lock() doesn't really work. It is needed for
+  cpuset_cpus_allowed_locked() but we can't take this lock in
+  try_to_wake_up()->select_fallback_rq() path.
+
+- cpuset_lock() is deadlockable. Suppose that a task T bound to CPU takes
+  callback_mutex. If cpu_down(CPU) happens before T drops callback_mutex
+  stop_machine() preempts T, then migration_call(CPU_DEAD) tries to take
+  cpuset_lock() and hangs forever because CPU is already dead and thus
+  T can't be scheduled.
+
+- cpuset_cpus_allowed_locked() is deadlockable too. It takes task_lock()
+  which is not irq-safe, but try_to_wake_up() can be called from irq.
+
+Kill them, and change select_fallback_rq() to use cpu_possible_mask, like
+we currently do without CONFIG_CPUSETS.
+
+Also, with or without this patch, with or without CONFIG_CPUSETS, the
+callers of select_fallback_rq() can race with each other or with
+set_cpus_allowed() pathes.
+
+The subsequent patches try to to fix these problems.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091003.GA9123@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/cpuset.h |   13 -------------
+ kernel/cpuset.c        |   27 +--------------------------
+ kernel/sched.c         |   10 +++-------
+ 3 files changed, 4 insertions(+), 46 deletions(-)
+
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -21,8 +21,6 @@ extern int number_of_cpusets;	/* How man
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+ extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+-extern void cpuset_cpus_allowed_locked(struct task_struct *p,
+-				       struct cpumask *mask);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+ void cpuset_init_current_mems_allowed(void);
+@@ -69,9 +67,6 @@ struct seq_file;
+ extern void cpuset_task_status_allowed(struct seq_file *m,
+ 					struct task_struct *task);
+ 
+-extern void cpuset_lock(void);
+-extern void cpuset_unlock(void);
+-
+ extern int cpuset_mem_spread_node(void);
+ 
+ static inline int cpuset_do_page_mem_spread(void)
+@@ -105,11 +100,6 @@ static inline void cpuset_cpus_allowed(s
+ {
+ 	cpumask_copy(mask, cpu_possible_mask);
+ }
+-static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
+-					      struct cpumask *mask)
+-{
+-	cpumask_copy(mask, cpu_possible_mask);
+-}
+ 
+ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
+ {
+@@ -157,9 +147,6 @@ static inline void cpuset_task_status_al
+ {
+ }
+ 
+-static inline void cpuset_lock(void) {}
+-static inline void cpuset_unlock(void) {}
+-
+ static inline int cpuset_mem_spread_node(void)
+ {
+ 	return 0;
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -2145,19 +2145,10 @@ void __init cpuset_init_smp(void)
+ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
+ {
+ 	mutex_lock(&callback_mutex);
+-	cpuset_cpus_allowed_locked(tsk, pmask);
+-	mutex_unlock(&callback_mutex);
+-}
+-
+-/**
+- * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
+- * Must be called with callback_mutex held.
+- **/
+-void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
+-{
+ 	task_lock(tsk);
+ 	guarantee_online_cpus(task_cs(tsk), pmask);
+ 	task_unlock(tsk);
++	mutex_unlock(&callback_mutex);
+ }
+ 
+ void cpuset_init_current_mems_allowed(void)
+@@ -2346,22 +2337,6 @@ int __cpuset_node_allowed_hardwall(int n
+ }
+ 
+ /**
+- * cpuset_lock - lock out any changes to cpuset structures
+- *
+- * The out of memory (oom) code needs to mutex_lock cpusets
+- * from being changed while it scans the tasklist looking for a
+- * task in an overlapping cpuset.  Expose callback_mutex via this
+- * cpuset_lock() routine, so the oom code can lock it, before
+- * locking the task list.  The tasklist_lock is a spinlock, so
+- * must be taken inside callback_mutex.
+- */
+-
+-void cpuset_lock(void)
+-{
+-	mutex_lock(&callback_mutex);
+-}
+-
+-/**
+  * cpuset_unlock - release lock on cpuset changes
+  *
+  * Undo the lock taken in a previous cpuset_lock() call.
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2349,11 +2349,9 @@ static int select_fallback_rq(int cpu, s
+ 		return dest_cpu;
+ 
+ 	/* No more Mr. Nice Guy. */
+-	if (dest_cpu >= nr_cpu_ids) {
+-		rcu_read_lock();
+-		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+-		rcu_read_unlock();
+-		dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
++	if (unlikely(dest_cpu >= nr_cpu_ids)) {
++		cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
++		dest_cpu = cpumask_any(cpu_active_mask);
+ 
+ 		/*
+ 		 * Don't tell them about moving exiting tasks or
+@@ -7833,7 +7831,6 @@ migration_call(struct notifier_block *nf
+ 
+ 	case CPU_DEAD:
+ 	case CPU_DEAD_FROZEN:
+-		cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
+ 		migrate_live_tasks(cpu);
+ 		rq = cpu_rq(cpu);
+ 		/* Idle task back to normal (off runqueue, low prio) */
+@@ -7844,7 +7841,6 @@ migration_call(struct notifier_block *nf
+ 		rq->idle->sched_class = &idle_sched_class;
+ 		migrate_dead_tasks(cpu);
+ 		spin_unlock_irq(&rq->lock);
+-		cpuset_unlock();
+ 		migrate_nr_uninterruptible(rq);
+ 		BUG_ON(rq->nr_running != 0);
+ 		calc_global_load_remove(rq);
diff --git a/queue-2.6.32/sched-make-select_fallback_rq-cpuset-friendly.patch b/queue-2.6.32/sched-make-select_fallback_rq-cpuset-friendly.patch
new file mode 100644
index 00000000000..833c7cc05c1
--- /dev/null
+++ b/queue-2.6.32/sched-make-select_fallback_rq-cpuset-friendly.patch
@@ -0,0 +1,123 @@
+From oleg@redhat.com  Fri Sep 17 18:17:45 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:27 +0100
+Subject: sched: Make select_fallback_rq() cpuset friendly
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <cfcf4b5d923ac7e65cf0725c08e5ab233634719a.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 9084bb8246ea935b98320554229e2f371f7f52fa upstream
+
+Introduce cpuset_cpus_allowed_fallback() helper to fix the cpuset problems
+with select_fallback_rq(). It can be called from any context and can't use
+any cpuset locks including task_lock(). It is called when the task doesn't
+have online cpus in ->cpus_allowed but ttwu/etc must be able to find a
+suitable cpu.
+
+I am not proud of this patch. Everything which needs such a fat comment
+can't be good even if correct. But I'd prefer to not change the locking
+rules in the code I hardly understand, and in any case I believe this
+simple change make the code much more correct compared to deadlocks we
+currently have.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091027.GA9155@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/cpuset.h |    7 +++++++
+ kernel/cpuset.c        |   42 ++++++++++++++++++++++++++++++++++++++++++
+ kernel/sched.c         |    4 +---
+ 3 files changed, 50 insertions(+), 3 deletions(-)
+
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -21,6 +21,7 @@ extern int number_of_cpusets;	/* How man
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+ extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
++extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+ void cpuset_init_current_mems_allowed(void);
+@@ -101,6 +102,12 @@ static inline void cpuset_cpus_allowed(s
+ 	cpumask_copy(mask, cpu_possible_mask);
+ }
+ 
++static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
++{
++	cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
++	return cpumask_any(cpu_active_mask);
++}
++
+ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
+ {
+ 	return node_possible_map;
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -2151,6 +2151,48 @@ void cpuset_cpus_allowed(struct task_str
+ 	mutex_unlock(&callback_mutex);
+ }
+ 
++int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
++{
++	const struct cpuset *cs;
++	int cpu;
++
++	rcu_read_lock();
++	cs = task_cs(tsk);
++	if (cs)
++		cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
++	rcu_read_unlock();
++
++	/*
++	 * We own tsk->cpus_allowed, nobody can change it under us.
++	 *
++	 * But we used cs && cs->cpus_allowed lockless and thus can
++	 * race with cgroup_attach_task() or update_cpumask() and get
++	 * the wrong tsk->cpus_allowed. However, both cases imply the
++	 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
++	 * which takes task_rq_lock().
++	 *
++	 * If we are called after it dropped the lock we must see all
++	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
++	 * set any mask even if it is not right from task_cs() pov,
++	 * the pending set_cpus_allowed_ptr() will fix things.
++	 */
++
++	cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
++	if (cpu >= nr_cpu_ids) {
++		/*
++		 * Either tsk->cpus_allowed is wrong (see above) or it
++		 * is actually empty. The latter case is only possible
++		 * if we are racing with remove_tasks_in_empty_cpuset().
++		 * Like above we can temporary set any mask and rely on
++		 * set_cpus_allowed_ptr() as synchronization point.
++		 */
++		cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
++		cpu = cpumask_any(cpu_active_mask);
++	}
++
++	return cpu;
++}
++
+ void cpuset_init_current_mems_allowed(void)
+ {
+ 	nodes_setall(current->mems_allowed);
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2353,9 +2353,7 @@ static int select_fallback_rq(int cpu, s
+ 
+ 	/* No more Mr. Nice Guy. */
+ 	if (unlikely(dest_cpu >= nr_cpu_ids)) {
+-		cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+-		dest_cpu = cpumask_any(cpu_active_mask);
+-
++		dest_cpu = cpuset_cpus_allowed_fallback(p);
+ 		/*
+ 		 * Don't tell them about moving exiting tasks or
+ 		 * kernel threads (both mm NULL), since they never
diff --git a/queue-2.6.32/sched-more-generic-wake_affine-vs-select_idle_sibling.patch b/queue-2.6.32/sched-more-generic-wake_affine-vs-select_idle_sibling.patch
new file mode 100644
index 00000000000..40b58b5d53f
--- /dev/null
+++ b/queue-2.6.32/sched-more-generic-wake_affine-vs-select_idle_sibling.patch
@@ -0,0 +1,91 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:19:43 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu, 12 Nov 2009 15:55:29 +0100
+Subject: sched: More generic WAKE_AFFINE vs select_idle_sibling()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <4fe736bd5f08977bf198f67dd272162a061c1a02.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit fe3bcfe1f6c1fc4ea7706ac2d05e579fd9092682 upstream
+
+Instead of only considering SD_WAKE_AFFINE | SD_PREFER_SIBLING
+domains also allow all SD_PREFER_SIBLING domains below a
+SD_WAKE_AFFINE domain to change the affinity target.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Mike Galbraith <efault@gmx.de>
+LKML-Reference: <20091112145610.909723612@chello.nl>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |   33 ++++++++++++++++-----------------
+ 1 file changed, 16 insertions(+), 17 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1398,20 +1398,16 @@ select_idle_sibling(struct task_struct *
+ 	 * test in select_task_rq_fair) and the prev_cpu is idle then that's
+ 	 * always a better target than the current cpu.
+ 	 */
+-	if (target == cpu) {
+-		if (!cpu_rq(prev_cpu)->cfs.nr_running)
+-			target = prev_cpu;
+-	}
++	if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running)
++		return prev_cpu;
+ 
+ 	/*
+ 	 * Otherwise, iterate the domain and find an elegible idle cpu.
+ 	 */
+-	if (target == -1 || target == cpu) {
+-		for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
+-			if (!cpu_rq(i)->cfs.nr_running) {
+-				target = i;
+-				break;
+-			}
++	for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
++		if (!cpu_rq(i)->cfs.nr_running) {
++			target = i;
++			break;
+ 		}
+ 	}
+ 
+@@ -1475,7 +1471,12 @@ select_task_rq_fair(struct rq *rq, struc
+ 				want_sd = 0;
+ 		}
+ 
+-		if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
++		/*
++		 * While iterating the domains looking for a spanning
++		 * WAKE_AFFINE domain, adjust the affine target to any idle cpu
++		 * in cache sharing domains along the way.
++		 */
++		if (want_affine) {
+ 			int target = -1;
+ 
+ 			/*
+@@ -1488,17 +1489,15 @@ select_task_rq_fair(struct rq *rq, struc
+ 			/*
+ 			 * If there's an idle sibling in this domain, make that
+ 			 * the wake_affine target instead of the current cpu.
+-			 *
+-			 * XXX: should we possibly do this outside of
+-			 * WAKE_AFFINE, in case the shared cache domain is
+-			 * smaller than the WAKE_AFFINE domain?
+ 			 */
+ 			if (tmp->flags & SD_PREFER_SIBLING)
+ 				target = select_idle_sibling(p, tmp, target);
+ 
+ 			if (target >= 0) {
+-				affine_sd = tmp;
+-				want_affine = 0;
++				if (tmp->flags & SD_WAKE_AFFINE) {
++					affine_sd = tmp;
++					want_affine = 0;
++				}
+ 				cpu = target;
+ 			}
+ 		}
diff --git a/queue-2.6.32/sched-move_task_off_dead_cpu-remove-retry-logic.patch b/queue-2.6.32/sched-move_task_off_dead_cpu-remove-retry-logic.patch
new file mode 100644
index 00000000000..bce1a34a198
--- /dev/null
+++ b/queue-2.6.32/sched-move_task_off_dead_cpu-remove-retry-logic.patch
@@ -0,0 +1,62 @@
+From oleg@redhat.com  Fri Sep 17 18:15:27 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:14 +0100
+Subject: sched: move_task_off_dead_cpu(): Remove retry logic
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <d61f978b6a63cf12e26234bf81629a001c2221d0.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit c1804d547dc098363443667609c272d1e4d15ee8 upstream
+
+The previous patch preserved the retry logic, but it looks unneeded.
+
+__migrate_task() can only fail if we raced with migration after we dropped
+the lock, but in this case the caller of set_cpus_allowed/etc must initiate
+migration itself if ->on_rq == T.
+
+We already fixed p->cpus_allowed, the changes in active/online masks must
+be visible to racer, it should migrate the task to online cpu correctly.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091014.GA9138@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -7407,7 +7407,7 @@ static void move_task_off_dead_cpu(int d
+ 	struct rq *rq = cpu_rq(dead_cpu);
+ 	int needs_cpu, uninitialized_var(dest_cpu);
+ 	unsigned long flags;
+-again:
++
+ 	local_irq_save(flags);
+ 
+ 	spin_lock(&rq->lock);
+@@ -7415,14 +7415,13 @@ again:
+ 	if (needs_cpu)
+ 		dest_cpu = select_fallback_rq(dead_cpu, p);
+ 	spin_unlock(&rq->lock);
+-
+-	/* It can have affinity changed while we were choosing. */
++	/*
++	 * It can only fail if we race with set_cpus_allowed(),
++	 * in the racer should migrate the task anyway.
++	 */
+ 	if (needs_cpu)
+-		needs_cpu = !__migrate_task(p, dead_cpu, dest_cpu);
++		__migrate_task(p, dead_cpu, dest_cpu);
+ 	local_irq_restore(flags);
+-
+-	if (unlikely(needs_cpu))
+-		goto again;
+ }
+ 
+ /*
diff --git a/queue-2.6.32/sched-move_task_off_dead_cpu-take-rq-lock-around-select_fallback_rq.patch b/queue-2.6.32/sched-move_task_off_dead_cpu-take-rq-lock-around-select_fallback_rq.patch
new file mode 100644
index 00000000000..37aa5c71eea
--- /dev/null
+++ b/queue-2.6.32/sched-move_task_off_dead_cpu-take-rq-lock-around-select_fallback_rq.patch
@@ -0,0 +1,88 @@
+From oleg@redhat.com  Fri Sep 17 18:15:12 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:10 +0100
+Subject: sched: move_task_off_dead_cpu(): Take rq->lock around select_fallback_rq()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <f0c871a27f468c7e4c8cbe43a79f506dc323b9b6.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 1445c08d06c5594895b4fae952ef8a457e89c390 upstream
+
+move_task_off_dead_cpu()->select_fallback_rq() reads/updates ->cpus_allowed
+lockless. We can race with set_cpus_allowed() running in parallel.
+
+Change it to take rq->lock around select_fallback_rq(). Note that it is not
+trivial to move this spin_lock() into select_fallback_rq(), we must recheck
+the task was not migrated after we take the lock and other callers do not
+need this lock.
+
+To avoid the races with other callers of select_fallback_rq() which rely on
+TASK_WAKING, we also check p->state != TASK_WAKING and do nothing otherwise.
+The owner of TASK_WAKING must update ->cpus_allowed and choose the correct
+CPU anyway, and the subsequent __migrate_task() is just meaningless because
+p->se.on_rq must be false.
+
+Alternatively, we could change select_task_rq() to take rq->lock right
+after it calls sched_class->select_task_rq(), but this looks a bit ugly.
+
+Also, change it to not assume irqs are disabled and absorb __migrate_task_irq().
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091010.GA9131@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   30 +++++++++++++++---------------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -7399,29 +7399,29 @@ static int migration_thread(void *data)
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-
+-static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
+-{
+-	int ret;
+-
+-	local_irq_disable();
+-	ret = __migrate_task(p, src_cpu, dest_cpu);
+-	local_irq_enable();
+-	return ret;
+-}
+-
+ /*
+  * Figure out where task on dead CPU should go, use force if necessary.
+  */
+ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ {
+-	int dest_cpu;
+-
++	struct rq *rq = cpu_rq(dead_cpu);
++	int needs_cpu, uninitialized_var(dest_cpu);
++	unsigned long flags;
+ again:
+-	dest_cpu = select_fallback_rq(dead_cpu, p);
++	local_irq_save(flags);
++
++	spin_lock(&rq->lock);
++	needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
++	if (needs_cpu)
++		dest_cpu = select_fallback_rq(dead_cpu, p);
++	spin_unlock(&rq->lock);
+ 
+ 	/* It can have affinity changed while we were choosing. */
+-	if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
++	if (needs_cpu)
++		needs_cpu = !__migrate_task(p, dead_cpu, dest_cpu);
++	local_irq_restore(flags);
++
++	if (unlikely(needs_cpu))
+ 		goto again;
+ }
+ 
diff --git a/queue-2.6.32/sched-optimize-task_rq_lock.patch b/queue-2.6.32/sched-optimize-task_rq_lock.patch
new file mode 100644
index 00000000000..ccd09f685f5
--- /dev/null
+++ b/queue-2.6.32/sched-optimize-task_rq_lock.patch
@@ -0,0 +1,87 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:18:19 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Thu, 25 Mar 2010 21:05:16 +0100
+Subject: sched: Optimize task_rq_lock()
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <abb25d422e8ff6033d0feee1ae9a47377ed5df8e.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 65cc8e4859ff29a9ddc989c88557d6059834c2a2 upstream
+
+Now that we hold the rq->lock over set_task_cpu() again, we can do
+away with most of the TASK_WAKING checks and reduce them again to
+set_cpus_allowed_ptr().
+
+Removes some conditionals from scheduling hot-paths.
+
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Oleg Nesterov <oleg@redhat.com>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |   23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -942,8 +942,8 @@ static inline void finish_lock_switch(st
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+ 
+ /*
+- * Check whether the task is waking, we use this to synchronize against
+- * ttwu() so that task_cpu() reports a stable number.
++ * Check whether the task is waking, we use this to synchronize ->cpus_allowed
++ * against ttwu().
+  */
+ static inline int task_is_waking(struct task_struct *p)
+ {
+@@ -960,11 +960,9 @@ static inline struct rq *__task_rq_lock(
+ 	struct rq *rq;
+ 
+ 	for (;;) {
+-		while (task_is_waking(p))
+-			cpu_relax();
+ 		rq = task_rq(p);
+ 		spin_lock(&rq->lock);
+-		if (likely(rq == task_rq(p) && !task_is_waking(p)))
++		if (likely(rq == task_rq(p)))
+ 			return rq;
+ 		spin_unlock(&rq->lock);
+ 	}
+@@ -981,12 +979,10 @@ static struct rq *task_rq_lock(struct ta
+ 	struct rq *rq;
+ 
+ 	for (;;) {
+-		while (task_is_waking(p))
+-			cpu_relax();
+ 		local_irq_save(*flags);
+ 		rq = task_rq(p);
+ 		spin_lock(&rq->lock);
+-		if (likely(rq == task_rq(p) && !task_is_waking(p)))
++		if (likely(rq == task_rq(p)))
+ 			return rq;
+ 		spin_unlock_irqrestore(&rq->lock, *flags);
+ 	}
+@@ -7213,7 +7209,18 @@ int set_cpus_allowed_ptr(struct task_str
+ 	struct rq *rq;
+ 	int ret = 0;
+ 
++	/*
++	 * Serialize against TASK_WAKING so that ttwu() and wunt() can
++	 * drop the rq->lock and still rely on ->cpus_allowed.
++	 */
++again:
++	while (task_is_waking(p))
++		cpu_relax();
+ 	rq = task_rq_lock(p, &flags);
++	if (task_is_waking(p)) {
++		task_rq_unlock(rq, &flags);
++		goto again;
++	}
+ 
+ 	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+ 		ret = -EINVAL;
diff --git a/queue-2.6.32/sched-pre-compute-cpumask_weight-sched_domain_span-sd.patch b/queue-2.6.32/sched-pre-compute-cpumask_weight-sched_domain_span-sd.patch
new file mode 100644
index 00000000000..fb27407d1a1
--- /dev/null
+++ b/queue-2.6.32/sched-pre-compute-cpumask_weight-sched_domain_span-sd.patch
@@ -0,0 +1,95 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:20:23 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Fri, 16 Apr 2010 14:59:29 +0200
+Subject: sched: Pre-compute cpumask_weight(sched_domain_span(sd))
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <0c6c762bcaa163e06a13da32043ad968d1473188.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit 669c55e9f99b90e46eaa0f98a67ec53d46dc969a upstream
+
+Dave reported that his large SPARC machines spend lots of time in
+hweight64(), try and optimize some of those needless cpumask_weight()
+invocations (esp. with the large offstack cpumasks these are very
+expensive indeed).
+
+Reported-by: David Miller <davem@davemloft.net>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ include/linux/sched.h |    1 +
+ kernel/sched.c        |    7 +++++--
+ kernel/sched_fair.c   |    8 +++-----
+ 3 files changed, 9 insertions(+), 7 deletions(-)
+
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1000,6 +1000,7 @@ struct sched_domain {
+ 	char *name;
+ #endif
+ 
++	unsigned int span_weight;
+ 	/*
+ 	 * Span of all CPUs in this domain.
+ 	 *
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -3678,7 +3678,7 @@ unsigned long __weak arch_scale_freq_pow
+ 
+ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+ {
+-	unsigned long weight = cpumask_weight(sched_domain_span(sd));
++	unsigned long weight = sd->span_weight;
+ 	unsigned long smt_gain = sd->smt_gain;
+ 
+ 	smt_gain /= weight;
+@@ -3711,7 +3711,7 @@ unsigned long scale_rt_power(int cpu)
+ 
+ static void update_cpu_power(struct sched_domain *sd, int cpu)
+ {
+-	unsigned long weight = cpumask_weight(sched_domain_span(sd));
++	unsigned long weight = sd->span_weight;
+ 	unsigned long power = SCHED_LOAD_SCALE;
+ 	struct sched_group *sdg = sd->groups;
+ 
+@@ -8166,6 +8166,9 @@ cpu_attach_domain(struct sched_domain *s
+ 	struct rq *rq = cpu_rq(cpu);
+ 	struct sched_domain *tmp;
+ 
++	for (tmp = sd; tmp; tmp = tmp->parent)
++		tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
++
+ 	/* Remove the sched domains which do not contribute to scheduling. */
+ 	for (tmp = sd; tmp; ) {
+ 		struct sched_domain *parent = tmp->parent;
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1520,9 +1520,7 @@ select_task_rq_fair(struct rq *rq, struc
+ 		 * Pick the largest domain to update shares over
+ 		 */
+ 		tmp = sd;
+-		if (affine_sd && (!tmp ||
+-				  cpumask_weight(sched_domain_span(affine_sd)) >
+-				  cpumask_weight(sched_domain_span(sd))))
++		if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
+ 			tmp = affine_sd;
+ 
+ 		if (tmp) {
+@@ -1566,10 +1564,10 @@ select_task_rq_fair(struct rq *rq, struc
+ 
+ 		/* Now try balancing at a lower domain level of new_cpu */
+ 		cpu = new_cpu;
+-		weight = cpumask_weight(sched_domain_span(sd));
++		weight = sd->span_weight;
+ 		sd = NULL;
+ 		for_each_domain(cpu, tmp) {
+-			if (weight <= cpumask_weight(sched_domain_span(tmp)))
++			if (weight <= tmp->span_weight)
+ 				break;
+ 			if (tmp->flags & sd_flag)
+ 				sd = tmp;
diff --git a/queue-2.6.32/sched-queue-a-deboosted-task-to-the-head-of-the-rt-prio-queue.patch b/queue-2.6.32/sched-queue-a-deboosted-task-to-the-head-of-the-rt-prio-queue.patch
new file mode 100644
index 00000000000..61683df70f3
--- /dev/null
+++ b/queue-2.6.32/sched-queue-a-deboosted-task-to-the-head-of-the-rt-prio-queue.patch
@@ -0,0 +1,56 @@
+From tglx@linutronix.de  Fri Sep 17 18:14:25 2010
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 20 Jan 2010 20:59:06 +0000
+Subject: sched: Queue a deboosted task to the head of the RT prio queue
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <55050ebe52e5ca5834a6f847d19809cba5dc10a0.1283514307.git.efault@gmx.de>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 60db48cacb9b253d5607a5ff206112a59cd09e34 upstream
+
+rtmutex_set_prio() is used to implement priority inheritance for
+futexes. When a task is deboosted it gets enqueued at the tail of its
+RT priority list. This is violating the POSIX scheduling semantics:
+
+rt priority list X contains two runnable tasks A and B
+
+task A	 runs with priority X and holds mutex M
+task C	 preempts A and is blocked on mutex M
+     	 -> task A is boosted to priority of task C (Y)
+task A	 unlocks the mutex M and deboosts itself
+     	 -> A is dequeued from rt priority list Y
+	 -> A is enqueued to the tail of rt priority list X
+task C	 schedules away
+task B	 runs
+
+This is wrong as task A did not schedule away and therefor violates
+the POSIX scheduling semantics.
+
+Enqueue the task to the head of the priority list instead.
+
+Reported-by: Mathias Weber <mathias.weber.mw1@roche.com>
+Reported-by: Carsten Emde <cbe@osadl.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Tested-by: Carsten Emde <cbe@osadl.org>
+Tested-by: Mathias Weber <mathias.weber.mw1@roche.com>
+LKML-Reference: <20100120171629.809074113@linutronix.de>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -6184,7 +6184,7 @@ void rt_mutex_setprio(struct task_struct
+ 	if (running)
+ 		p->sched_class->set_curr_task(rq);
+ 	if (on_rq) {
+-		enqueue_task(rq, p, 0, false);
++		enqueue_task(rq, p, 0, oldprio < prio);
+ 
+ 		check_class_changed(rq, p, prev_class, oldprio, running);
+ 	}
diff --git a/queue-2.6.32/sched-remove-unnecessary-rcu-exclusion.patch b/queue-2.6.32/sched-remove-unnecessary-rcu-exclusion.patch
new file mode 100644
index 00000000000..4d2f370c542
--- /dev/null
+++ b/queue-2.6.32/sched-remove-unnecessary-rcu-exclusion.patch
@@ -0,0 +1,62 @@
+From a.p.zijlstra@chello.nl  Fri Sep 17 18:19:01 2010
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Date: Tue, 1 Dec 2009 12:21:47 +0100
+Subject: sched: Remove unnecessary RCU exclusion
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <96e351935dd8b98a2e436bf3e254fa3d91f4bd2d.1283514307.git.efault@gmx.de>
+
+From: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+commit fb58bac5c75bfff8bbf7d02071a10a62f32fe28b upstream
+
+As Nick pointed out, and realized by myself when doing:
+   sched: Fix balance vs hotplug race
+the patch:
+   sched: for_each_domain() vs RCU
+
+is wrong, sched_domains are freed after synchronize_sched(), which
+means disabling preemption is enough.
+
+Reported-by: Nick Piggin <npiggin@suse.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <new-submission>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched_fair.c |    9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1410,7 +1410,6 @@ select_task_rq_fair(struct rq *rq, struc
+ 		new_cpu = prev_cpu;
+ 	}
+ 
+-	rcu_read_lock();
+ 	for_each_domain(cpu, tmp) {
+ 		if (!(tmp->flags & SD_LOAD_BALANCE))
+ 			continue;
+@@ -1500,10 +1499,8 @@ select_task_rq_fair(struct rq *rq, struc
+ 		}
+ 	}
+ 
+-	if (affine_sd && wake_affine(affine_sd, p, sync)) {
+-		new_cpu = cpu;
+-		goto out;
+-	}
++	if (affine_sd && wake_affine(affine_sd, p, sync))
++		return cpu;
+ 
+ 	while (sd) {
+ 		int load_idx = sd->forkexec_idx;
+@@ -1544,8 +1541,6 @@ select_task_rq_fair(struct rq *rq, struc
+ 		/* while loop will break here if sd == NULL */
+ 	}
+ 
+-out:
+-	rcu_read_unlock();
+ 	return new_cpu;
+ }
+ #endif /* CONFIG_SMP */
diff --git a/queue-2.6.32/sched-sched_exec-remove-the-select_fallback_rq-logic.patch b/queue-2.6.32/sched-sched_exec-remove-the-select_fallback_rq-logic.patch
new file mode 100644
index 00000000000..7db83a47943
--- /dev/null
+++ b/queue-2.6.32/sched-sched_exec-remove-the-select_fallback_rq-logic.patch
@@ -0,0 +1,96 @@
+From 30da688ef6b76e01969b00608202fff1eed2accc Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Mon, 15 Mar 2010 10:10:19 +0100
+Subject: sched: sched_exec(): Remove the select_fallback_rq() logic
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 30da688ef6b76e01969b00608202fff1eed2accc upstream.
+
+sched_exec()->select_task_rq() reads/updates ->cpus_allowed lockless.
+This can race with other CPUs updating our ->cpus_allowed, and this
+looks meaningless to me.
+
+The task is current and running, it must have online cpus in ->cpus_allowed,
+the fallback mode is bogus. And, if ->sched_class returns the "wrong" cpu,
+this likely means we raced with set_cpus_allowed() which was called
+for reason, why should sched_exec() retry and call ->select_task_rq()
+again?
+
+Change the code to call sched_class->select_task_rq() directly and do
+nothing if the returned cpu is wrong after re-checking under rq->lock.
+
+From now task_struct->cpus_allowed is always stable under TASK_WAKING,
+select_fallback_rq() is always called under rq-lock or the caller or
+the caller owns TASK_WAKING (select_task_rq).
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100315091019.GA9141@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c |   25 ++++++++-----------------
+ 1 file changed, 8 insertions(+), 17 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -2333,6 +2333,9 @@ void task_oncpu_function_call(struct tas
+ }
+ 
+ #ifdef CONFIG_SMP
++/*
++ * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
++ */
+ static int select_fallback_rq(int cpu, struct task_struct *p)
+ {
+ 	int dest_cpu;
+@@ -2369,12 +2372,7 @@ static int select_fallback_rq(int cpu, s
+ }
+ 
+ /*
+- * Gets called from 3 sites (exec, fork, wakeup), since it is called without
+- * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done
+- * by:
+- *
+- *  exec:           is unstable, retry loop
+- *  fork & wake-up: serialize ->cpus_allowed against TASK_WAKING
++ * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
+  */
+ static inline
+ int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+@@ -3223,9 +3221,8 @@ void sched_exec(void)
+ 	unsigned long flags;
+ 	struct rq *rq;
+ 
+-again:
+ 	this_cpu = get_cpu();
+-	dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
++	dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
+ 	if (dest_cpu == this_cpu) {
+ 		put_cpu();
+ 		return;
+@@ -3233,18 +3230,12 @@ again:
+ 
+ 	rq = task_rq_lock(p, &flags);
+ 	put_cpu();
+-
+ 	/*
+ 	 * select_task_rq() can race against ->cpus_allowed
+ 	 */
+-	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
+-	    || unlikely(!cpu_active(dest_cpu))) {
+-		task_rq_unlock(rq, &flags);
+-		goto again;
+-	}
+-
+-	/* force the process onto the specified CPU */
+-	if (migrate_task(p, dest_cpu, &req)) {
++	if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
++	    likely(cpu_active(dest_cpu)) &&
++	    migrate_task(p, dest_cpu, &req)) {
+ 		/* Need to wait for migration thread (might exit: take ref). */
+ 		struct task_struct *mt = rq->migration_thread;
+ 
diff --git a/queue-2.6.32/sched-set_cpus_allowed_ptr-don-t-use-rq-migration_thread-after-unlock.patch b/queue-2.6.32/sched-set_cpus_allowed_ptr-don-t-use-rq-migration_thread-after-unlock.patch
new file mode 100644
index 00000000000..bc81017c0c4
--- /dev/null
+++ b/queue-2.6.32/sched-set_cpus_allowed_ptr-don-t-use-rq-migration_thread-after-unlock.patch
@@ -0,0 +1,37 @@
+From oleg@redhat.com  Fri Sep 17 18:14:40 2010
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Tue, 30 Mar 2010 18:58:29 +0200
+Subject: sched: set_cpus_allowed_ptr(): Don't use rq->migration_thread after unlock
+To: stable <stable@kernel.org>
+Cc: Ingo Molnar <mingo@elte.hu>, Peter Zijlstra <a.p.zijlstra@chello.nl>, Greg KH <greg@kroah.com>
+Message-ID: <bb11665c972dd1d8ad681538e851ed2d9cc6741d.1283514307.git.efault@gmx.de>
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 47a70985e5c093ae03d8ccf633c70a93761d86f2 upstream
+
+Trivial typo fix. rq->migration_thread can be NULL after
+task_rq_unlock(), this is why we have "mt" which should be
+ used instead.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+LKML-Reference: <20100330165829.GA18284@redhat.com>
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ kernel/sched.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -7273,7 +7273,7 @@ int set_cpus_allowed_ptr(struct task_str
+ 
+ 		get_task_struct(mt);
+ 		task_rq_unlock(rq, &flags);
+-		wake_up_process(rq->migration_thread);
++		wake_up_process(mt);
+ 		put_task_struct(mt);
+ 		wait_for_completion(&req.done);
+ 		tlb_migrate_finish(p->mm);
diff --git a/queue-2.6.32/series b/queue-2.6.32/series
index 58ea8fddf28..7fd031394e1 100644
--- a/queue-2.6.32/series
+++ b/queue-2.6.32/series
@@ -84,3 +84,28 @@ sched-add-pre-and-post-wakeup-hooks.patch
 sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch
 sched-fix-hotplug-hang.patch
 sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch
+sched-fix-incorrect-sanity-check.patch
+sched-fix-race-between-ttwu-and-task_rq_lock.patch
+sched-extend-enqueue_task-to-allow-head-queueing.patch
+sched-implement-head-queueing-for-sched_rt.patch
+sched-queue-a-deboosted-task-to-the-head-of-the-rt-prio-queue.patch
+sched-set_cpus_allowed_ptr-don-t-use-rq-migration_thread-after-unlock.patch
+sched-kill-the-broken-and-deadlockable-cpuset_lock-cpuset_cpus_allowed_locked-code.patch
+sched-move_task_off_dead_cpu-take-rq-lock-around-select_fallback_rq.patch
+sched-move_task_off_dead_cpu-remove-retry-logic.patch
+sched-sched_exec-remove-the-select_fallback_rq-logic.patch
+sched-_cpu_down-don-t-play-with-current-cpus_allowed.patch
+sched-make-select_fallback_rq-cpuset-friendly.patch
+sched-fix-task_waking-vs-fork-deadlock.patch
+sched-optimize-task_rq_lock.patch
+sched-fix-nr_uninterruptible-count.patch
+sched-fix-rq-clock-synchronization-when-migrating-tasks.patch
+sched-remove-unnecessary-rcu-exclusion.patch
+sched-apply-rcu-protection-to-wake_affine.patch
+sched-cleanup-select_task_rq_fair.patch
+sched-more-generic-wake_affine-vs-select_idle_sibling.patch
+sched-fix-vmark-regression-on-big-machines.patch
+sched-fix-select_idle_sibling.patch
+sched-pre-compute-cpumask_weight-sched_domain_span-sd.patch
+sched-fix-select_idle_sibling-logic-in-select_task_rq_fair.patch
+sched-cpuacct-use-bigger-percpu-counter-batch-values-for-stats-counters.patch