From: Greg Kroah-Hartman Date: Thu, 16 Sep 2010 21:06:53 +0000 (-0700) Subject: .32 patches X-Git-Tag: v2.6.27.54~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e261a57ba3e1669cf04da27534d71b95869e3b2e;p=thirdparty%2Fkernel%2Fstable-queue.git .32 patches --- diff --git a/queue-2.6.32/sched-add-pre-and-post-wakeup-hooks.patch b/queue-2.6.32/sched-add-pre-and-post-wakeup-hooks.patch new file mode 100644 index 00000000000..f5f334f1e65 --- /dev/null +++ b/queue-2.6.32/sched-add-pre-and-post-wakeup-hooks.patch @@ -0,0 +1,95 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:05:38 2010 +From: Peter Zijlstra +Date: Wed, 16 Dec 2009 18:04:40 +0100 +Subject: sched: Add pre and post wakeup hooks +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: + +From: Peter Zijlstra + +commit efbbd05a595343a413964ad85a2ad359b7b7efbd upstream + +As will be apparent in the next patch, we need a pre wakeup hook +for sched_fair task migration, hence rename the post wakeup hook +and one pre wakeup. + +Signed-off-by: Peter Zijlstra +Cc: Mike Galbraith +LKML-Reference: <20091216170518.114746117@chello.nl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched.h | 3 ++- + kernel/sched.c | 12 ++++++++---- + kernel/sched_rt.c | 4 ++-- + 3 files changed, 12 insertions(+), 7 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1093,7 +1093,8 @@ struct sched_class { + enum cpu_idle_type idle); + void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); + void (*post_schedule) (struct rq *this_rq); +- void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); ++ void (*task_waking) (struct rq *this_rq, struct task_struct *task); ++ void (*task_woken) (struct rq *this_rq, struct task_struct *task); + + void (*set_cpus_allowed)(struct task_struct *p, + const struct cpumask *newmask); +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2438,6 +2438,10 @@ static int try_to_wake_up(struct task_st + if (task_contributes_to_load(p)) + rq->nr_uninterruptible--; + p->state = TASK_WAKING; ++ ++ if (p->sched_class->task_waking) ++ p->sched_class->task_waking(rq, p); ++ + __task_rq_unlock(rq); + + cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); +@@ -2501,8 +2505,8 @@ out_running: + + p->state = TASK_RUNNING; + #ifdef CONFIG_SMP +- if (p->sched_class->task_wake_up) +- p->sched_class->task_wake_up(rq, p); ++ if (p->sched_class->task_woken) ++ p->sched_class->task_woken(rq, p); + + if (unlikely(rq->idle_stamp)) { + u64 delta = rq->clock - rq->idle_stamp; +@@ -2693,8 +2697,8 @@ void wake_up_new_task(struct task_struct + trace_sched_wakeup_new(rq, p, 1); + check_preempt_curr(rq, p, WF_FORK); + #ifdef CONFIG_SMP +- if (p->sched_class->task_wake_up) +- p->sched_class->task_wake_up(rq, p); ++ if (p->sched_class->task_woken) ++ p->sched_class->task_woken(rq, p); + #endif + task_rq_unlock(rq, &flags); + } +--- a/kernel/sched_rt.c ++++ b/kernel/sched_rt.c +@@ -1485,7 +1485,7 @@ static void post_schedule_rt(struct rq * + * If we are not running and we are not going to reschedule soon, we should + * try to push tasks away now + */ +-static void task_wake_up_rt(struct rq *rq, struct task_struct *p) ++static void task_woken_rt(struct rq *rq, struct task_struct *p) + { + if (!task_running(rq, p) && + !test_tsk_need_resched(rq->curr) && +@@ -1766,7 +1766,7 @@ static const struct sched_class rt_sched + .rq_offline = rq_offline_rt, + .pre_schedule = pre_schedule_rt, + .post_schedule = post_schedule_rt, +- .task_wake_up = task_wake_up_rt, ++ .task_woken = task_woken_rt, + .switched_from = switched_from_rt, + #endif + diff --git a/queue-2.6.32/sched-clean-up-ttwu-rq-locking.patch b/queue-2.6.32/sched-clean-up-ttwu-rq-locking.patch new file mode 100644 index 00000000000..0c5a75d2fae --- /dev/null +++ b/queue-2.6.32/sched-clean-up-ttwu-rq-locking.patch @@ -0,0 +1,48 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:02:30 2010 +From: Peter Zijlstra +Date: Fri, 27 Nov 2009 15:44:43 +0100 +Subject: sched: Clean up ttwu() rq locking +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <2c1be9e75310adeb8e4f3f0c1bf356cd3d893ab0.1283514306.git.efault@gmx.de> + +From: Peter Zijlstra + +commit ab19cb23313733c55e0517607844b86720b35f5f upstream + +Since set_task_clock() doesn't rely on rq->clock anymore we can simplyfy +the mess in ttwu(). + +Optimize things a bit by not fiddling with the IRQ state there. + +Signed-off-by: Peter Zijlstra +LKML-Reference: +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2379,16 +2379,14 @@ static int try_to_wake_up(struct task_st + if (task_contributes_to_load(p)) + rq->nr_uninterruptible--; + p->state = TASK_WAKING; +- task_rq_unlock(rq, &flags); ++ __task_rq_unlock(rq); + + cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); + if (cpu != orig_cpu) + set_task_cpu(p, cpu); + +- rq = task_rq_lock(p, &flags); +- +- if (rq != orig_rq) +- update_rq_clock(rq); ++ rq = __task_rq_lock(p); ++ update_rq_clock(rq); + + WARN_ON(p->state != TASK_WAKING); + cpu = task_cpu(p); diff --git a/queue-2.6.32/sched-consolidate-select_task_rq-callers.patch b/queue-2.6.32/sched-consolidate-select_task_rq-callers.patch new file mode 100644 index 00000000000..f83ffdec2ed --- /dev/null +++ b/queue-2.6.32/sched-consolidate-select_task_rq-callers.patch @@ -0,0 +1,70 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:01:49 2010 +From: Peter Zijlstra +Date: Wed, 25 Nov 2009 13:31:39 +0100 +Subject: sched: Consolidate select_task_rq() callers +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <24492a0819920ab40aab02b943524de7e090d3d3.1283514306.git.efault@gmx.de> + +From: Peter Zijlstra + +commit 970b13bacba14a8cef6f642861947df1d175b0b3 upstream + +sched: Consolidate select_task_rq() callers + +Small cleanup. + +Signed-off-by: Peter Zijlstra +LKML-Reference: +[ v2: build fix ] +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2333,6 +2333,14 @@ void task_oncpu_function_call(struct tas + preempt_enable(); + } + ++#ifdef CONFIG_SMP ++static inline ++int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) ++{ ++ return p->sched_class->select_task_rq(p, sd_flags, wake_flags); ++} ++#endif ++ + /*** + * try_to_wake_up - wake up a thread + * @p: the to-be-woken-up thread +@@ -2386,7 +2394,7 @@ static int try_to_wake_up(struct task_st + p->state = TASK_WAKING; + task_rq_unlock(rq, &flags); + +- cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); ++ cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); + if (cpu != orig_cpu) + set_task_cpu(p, cpu); + +@@ -2601,7 +2609,7 @@ void sched_fork(struct task_struct *p, i + p->sched_class = &fair_sched_class; + + #ifdef CONFIG_SMP +- cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); ++ cpu = select_task_rq(p, SD_BALANCE_FORK, 0); + #endif + set_task_cpu(p, cpu); + +@@ -3170,7 +3178,7 @@ out: + void sched_exec(void) + { + int new_cpu, this_cpu = get_cpu(); +- new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); ++ new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); + put_cpu(); + if (new_cpu != this_cpu) + sched_migrate_task(current, new_cpu); diff --git a/queue-2.6.32/sched-ensure-set_task_cpu-is-never-called-on-blocked-tasks.patch b/queue-2.6.32/sched-ensure-set_task_cpu-is-never-called-on-blocked-tasks.patch new file mode 100644 index 00000000000..2d298829c1c --- /dev/null +++ b/queue-2.6.32/sched-ensure-set_task_cpu-is-never-called-on-blocked-tasks.patch @@ -0,0 +1,191 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:04:28 2010 +From: Peter Zijlstra +Date: Wed, 16 Dec 2009 18:04:36 +0100 +Subject: sched: Ensure set_task_cpu() is never called on blocked tasks +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <7cf1b7473e78ce3c210ddde81f1e72e778aecffb.1283514307.git.efault@gmx.de> + +From: Peter Zijlstra + +commit e2912009fb7b715728311b0d8fe327a1432b3f79 upstream + +In order to clean up the set_task_cpu() rq dependencies we need +to ensure it is never called on blocked tasks because such usage +does not pair with consistent rq->lock usage. + +This puts the migration burden on ttwu(). + +Furthermore we need to close a race against changing +->cpus_allowed, since select_task_rq() runs with only preemption +disabled. + +For sched_fork() this is safe because the child isn't in the +tasklist yet, for wakeup we fix this by synchronizing +set_cpus_allowed_ptr() against TASK_WAKING, which leaves +sched_exec to be a problem + +This also closes a hole in (6ad4c1888 sched: Fix balance vs +hotplug race) where ->select_task_rq() doesn't validate the +result against the sched_domain/root_domain. + +Signed-off-by: Peter Zijlstra +Cc: Mike Galbraith +LKML-Reference: <20091216170517.807938893@chello.nl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 83 +++++++++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 66 insertions(+), 17 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2017,21 +2017,15 @@ static inline void check_class_changed(s + */ + void kthread_bind(struct task_struct *p, unsigned int cpu) + { +- struct rq *rq = cpu_rq(cpu); +- unsigned long flags; +- + /* Must have done schedule() in kthread() before we set_task_cpu */ + if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { + WARN_ON(1); + return; + } + +- spin_lock_irqsave(&rq->lock, flags); +- set_task_cpu(p, cpu); + p->cpus_allowed = cpumask_of_cpu(cpu); + p->rt.nr_cpus_allowed = 1; + p->flags |= PF_THREAD_BOUND; +- spin_unlock_irqrestore(&rq->lock, flags); + } + EXPORT_SYMBOL(kthread_bind); + +@@ -2072,6 +2066,14 @@ void set_task_cpu(struct task_struct *p, + struct cfs_rq *old_cfsrq = task_cfs_rq(p), + *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); + ++#ifdef CONFIG_SCHED_DEBUG ++ /* ++ * We should never call set_task_cpu() on a blocked task, ++ * ttwu() will sort out the placement. ++ */ ++ WARN_ON(p->state != TASK_RUNNING && p->state != TASK_WAKING); ++#endif ++ + trace_sched_migrate_task(p, new_cpu); + + if (old_cpu != new_cpu) { +@@ -2105,12 +2107,10 @@ migrate_task(struct task_struct *p, int + + /* + * If the task is not on a runqueue (and not running), then +- * it is sufficient to simply update the task's cpu field. ++ * the next wake-up will properly place the task. + */ +- if (!p->se.on_rq && !task_running(rq, p)) { +- set_task_cpu(p, dest_cpu); ++ if (!p->se.on_rq && !task_running(rq, p)) + return 0; +- } + + init_completion(&req->done); + req->task = p; +@@ -2316,10 +2316,42 @@ void task_oncpu_function_call(struct tas + } + + #ifdef CONFIG_SMP ++/* ++ * Called from: ++ * ++ * - fork, @p is stable because it isn't on the tasklist yet ++ * ++ * - exec, @p is unstable XXX ++ * ++ * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so ++ * we should be good. ++ */ + static inline + int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) + { +- return p->sched_class->select_task_rq(p, sd_flags, wake_flags); ++ int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); ++ ++ /* ++ * In order not to call set_task_cpu() on a blocking task we need ++ * to rely on ttwu() to place the task on a valid ->cpus_allowed ++ * cpu. ++ * ++ * Since this is common to all placement strategies, this lives here. ++ * ++ * [ this allows ->select_task() to simply return task_cpu(p) and ++ * not worry about this generic constraint ] ++ */ ++ if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || ++ !cpu_active(cpu))) { ++ ++ cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); ++ /* ++ * XXX: race against hot-plug modifying cpu_active_mask ++ */ ++ BUG_ON(cpu >= nr_cpu_ids); ++ } ++ ++ return cpu; + } + #endif + +@@ -7128,7 +7160,23 @@ int set_cpus_allowed_ptr(struct task_str + struct rq *rq; + int ret = 0; + ++ /* ++ * Since we rely on wake-ups to migrate sleeping tasks, don't change ++ * the ->cpus_allowed mask from under waking tasks, which would be ++ * possible when we change rq->lock in ttwu(), so synchronize against ++ * TASK_WAKING to avoid that. ++ */ ++again: ++ while (p->state == TASK_WAKING) ++ cpu_relax(); ++ + rq = task_rq_lock(p, &flags); ++ ++ if (p->state == TASK_WAKING) { ++ task_rq_unlock(rq, &flags); ++ goto again; ++ } ++ + if (!cpumask_intersects(new_mask, cpu_active_mask)) { + ret = -EINVAL; + goto out; +@@ -7184,7 +7232,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) + { + struct rq *rq_dest, *rq_src; +- int ret = 0, on_rq; ++ int ret = 0; + + if (unlikely(!cpu_active(dest_cpu))) + return ret; +@@ -7200,12 +7248,13 @@ static int __migrate_task(struct task_st + if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + goto fail; + +- on_rq = p->se.on_rq; +- if (on_rq) ++ /* ++ * If we're not on a rq, the next wake-up will ensure we're ++ * placed properly. ++ */ ++ if (p->se.on_rq) { + deactivate_task(rq_src, p, 0); +- +- set_task_cpu(p, dest_cpu); +- if (on_rq) { ++ set_task_cpu(p, dest_cpu); + activate_task(rq_dest, p, 0); + check_preempt_curr(rq_dest, p, 0); + } diff --git a/queue-2.6.32/sched-fix-broken-assertion.patch b/queue-2.6.32/sched-fix-broken-assertion.patch new file mode 100644 index 00000000000..d00e6e207a6 --- /dev/null +++ b/queue-2.6.32/sched-fix-broken-assertion.patch @@ -0,0 +1,39 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:05:00 2010 +From: Peter Zijlstra +Date: Thu, 17 Dec 2009 13:16:31 +0100 +Subject: sched: Fix broken assertion +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <442ca4a574ee45640c3d99809b6171fb9d3f6646.1283514307.git.efault@gmx.de> + +From: Peter Zijlstra + +commit 077614ee1e93245a3b9a4e1213659405dbeb0ba6 upstream + +There's a preemption race in the set_task_cpu() debug check in +that when we get preempted after setting task->state we'd still +be on the rq proper, but fail the test. + +Check for preempted tasks, since those are always on the RQ. + +Signed-off-by: Peter Zijlstra +LKML-Reference: <20091217121830.137155561@chello.nl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2071,7 +2071,8 @@ void set_task_cpu(struct task_struct *p, + * We should never call set_task_cpu() on a blocked task, + * ttwu() will sort out the placement. + */ +- WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING); ++ WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && ++ !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); + #endif + + trace_sched_migrate_task(p, new_cpu); diff --git a/queue-2.6.32/sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch b/queue-2.6.32/sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch new file mode 100644 index 00000000000..8e5cb4cd7cb --- /dev/null +++ b/queue-2.6.32/sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch @@ -0,0 +1,184 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:06:15 2010 +From: Peter Zijlstra +Date: Thu, 21 Jan 2010 21:04:57 +0100 +Subject: sched: Fix fork vs hotplug vs cpuset namespaces +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: + +From: Peter Zijlstra + +commit fabf318e5e4bda0aca2b0d617b191884fda62703 upstream + +There are a number of issues: + +1) TASK_WAKING vs cgroup_clone (cpusets) + +copy_process(): + + sched_fork() + child->state = TASK_WAKING; /* waiting for wake_up_new_task() */ + if (current->nsproxy != p->nsproxy) + ns_cgroup_clone() + cgroup_clone() + mutex_lock(inode->i_mutex) + mutex_lock(cgroup_mutex) + cgroup_attach_task() + ss->can_attach() + ss->attach() [ -> cpuset_attach() ] + cpuset_attach_task() + set_cpus_allowed_ptr(); + while (child->state == TASK_WAKING) + cpu_relax(); +will deadlock the system. + +2) cgroup_clone (cpusets) vs copy_process + +So even if the above would work we still have: + +copy_process(): + + if (current->nsproxy != p->nsproxy) + ns_cgroup_clone() + cgroup_clone() + mutex_lock(inode->i_mutex) + mutex_lock(cgroup_mutex) + cgroup_attach_task() + ss->can_attach() + ss->attach() [ -> cpuset_attach() ] + cpuset_attach_task() + set_cpus_allowed_ptr(); + ... + + p->cpus_allowed = current->cpus_allowed + +over-writing the modified cpus_allowed. + +3) fork() vs hotplug + + if we unplug the child's cpu after the sanity check when the child + gets attached to the task_list but before wake_up_new_task() shit + will meet with fan. + +Solve all these issues by moving fork cpu selection into +wake_up_new_task(). + +Reported-by: Serge E. Hallyn +Tested-by: Serge E. Hallyn +Signed-off-by: Peter Zijlstra +LKML-Reference: <1264106190.4283.1314.camel@laptop> +Signed-off-by: Thomas Gleixner +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/fork.c | 15 --------------- + kernel/sched.c | 39 +++++++++++++++++++++++++++------------ + 2 files changed, 27 insertions(+), 27 deletions(-) + +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -1233,21 +1233,6 @@ static struct task_struct *copy_process( + /* Need tasklist lock for parent etc handling! */ + write_lock_irq(&tasklist_lock); + +- /* +- * The task hasn't been attached yet, so its cpus_allowed mask will +- * not be changed, nor will its assigned CPU. +- * +- * The cpus_allowed mask of the parent may have changed after it was +- * copied first time - so re-copy it here, then check the child's CPU +- * to ensure it is on a valid CPU (and if not, just force it back to +- * parent's CPU). This avoids alot of nasty races. +- */ +- p->cpus_allowed = current->cpus_allowed; +- p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; +- if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || +- !cpu_online(task_cpu(p)))) +- set_task_cpu(p, smp_processor_id()); +- + /* CLONE_PARENT re-uses the old parent */ + if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { + p->real_parent = current->real_parent; +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2351,14 +2351,12 @@ static int select_fallback_rq(int cpu, s + } + + /* +- * Called from: ++ * Gets called from 3 sites (exec, fork, wakeup), since it is called without ++ * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done ++ * by: + * +- * - fork, @p is stable because it isn't on the tasklist yet +- * +- * - exec, @p is unstable, retry loop +- * +- * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so +- * we should be good. ++ * exec: is unstable, retry loop ++ * fork & wake-up: serialize ->cpus_allowed against TASK_WAKING + */ + static inline + int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) +@@ -2652,9 +2650,6 @@ void sched_fork(struct task_struct *p, i + if (p->sched_class->task_fork) + p->sched_class->task_fork(p); + +-#ifdef CONFIG_SMP +- cpu = select_task_rq(p, SD_BALANCE_FORK, 0); +-#endif + set_task_cpu(p, cpu); + + #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +@@ -2684,6 +2679,21 @@ void wake_up_new_task(struct task_struct + { + unsigned long flags; + struct rq *rq; ++ int cpu = get_cpu(); ++ ++#ifdef CONFIG_SMP ++ /* ++ * Fork balancing, do it here and not earlier because: ++ * - cpus_allowed can change in the fork path ++ * - any previously selected cpu might disappear through hotplug ++ * ++ * We still have TASK_WAKING but PF_STARTING is gone now, meaning ++ * ->cpus_allowed is stable, we have preemption disabled, meaning ++ * cpu_online_mask is stable. ++ */ ++ cpu = select_task_rq(p, SD_BALANCE_FORK, 0); ++ set_task_cpu(p, cpu); ++#endif + + rq = task_rq_lock(p, &flags); + BUG_ON(p->state != TASK_WAKING); +@@ -2697,6 +2707,7 @@ void wake_up_new_task(struct task_struct + p->sched_class->task_woken(rq, p); + #endif + task_rq_unlock(rq, &flags); ++ put_cpu(); + } + + #ifdef CONFIG_PREEMPT_NOTIFIERS +@@ -7198,14 +7209,18 @@ int set_cpus_allowed_ptr(struct task_str + * the ->cpus_allowed mask from under waking tasks, which would be + * possible when we change rq->lock in ttwu(), so synchronize against + * TASK_WAKING to avoid that. ++ * ++ * Make an exception for freshly cloned tasks, since cpuset namespaces ++ * might move the task about, we have to validate the target in ++ * wake_up_new_task() anyway since the cpu might have gone away. + */ + again: +- while (p->state == TASK_WAKING) ++ while (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) + cpu_relax(); + + rq = task_rq_lock(p, &flags); + +- if (p->state == TASK_WAKING) { ++ if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) { + task_rq_unlock(rq, &flags); + goto again; + } diff --git a/queue-2.6.32/sched-fix-hotplug-hang.patch b/queue-2.6.32/sched-fix-hotplug-hang.patch new file mode 100644 index 00000000000..6bfa50e5e77 --- /dev/null +++ b/queue-2.6.32/sched-fix-hotplug-hang.patch @@ -0,0 +1,41 @@ +From peterz@infradead.org Thu Sep 16 14:06:03 2010 +From: Peter Zijlstra +Date: Sun, 20 Dec 2009 17:36:27 +0100 +Subject: sched: Fix hotplug hang +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: + +From: Peter Zijlstra + +commit 70f1120527797adb31c68bdc6f1b45e182c342c7 upstream + +The hot-unplug kstopmachine usage does a wakeup after +deactivating the cpu, hence we cannot use cpu_active() +here but must rely on the good olde online. + +Reported-by: Sachin Sant +Reported-by: Jens Axboe +Signed-off-by: Peter Zijlstra +Tested-by: Jens Axboe +Cc: Heiko Carstens +Cc: Benjamin Herrenschmidt +LKML-Reference: <1261326987.4314.24.camel@laptop> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2376,7 +2376,7 @@ int select_task_rq(struct task_struct *p + * not worry about this generic constraint ] + */ + if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || +- !cpu_active(cpu))) ++ !cpu_online(cpu))) + cpu = select_fallback_rq(task_cpu(p), p); + + return cpu; diff --git a/queue-2.6.32/sched-fix-sched_exec-balancing.patch b/queue-2.6.32/sched-fix-sched_exec-balancing.patch new file mode 100644 index 00000000000..97f53daec87 --- /dev/null +++ b/queue-2.6.32/sched-fix-sched_exec-balancing.patch @@ -0,0 +1,109 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:05:16 2010 +From: Peter Zijlstra +Date: Wed, 16 Dec 2009 18:04:37 +0100 +Subject: sched: Fix sched_exec() balancing +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: + +From: Peter Zijlstra + +commit 3802290628348674985d14914f9bfee7b9084548 upstream + +sched: Fix sched_exec() balancing + +Since we access ->cpus_allowed without holding rq->lock we need +a retry loop to validate the result, this comes for near free +when we merge sched_migrate_task() into sched_exec() since that +already does the needed check. + +Signed-off-by: Peter Zijlstra +Cc: Mike Galbraith +LKML-Reference: <20091216170517.884743662@chello.nl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 45 +++++++++++++++++++++++---------------------- + 1 file changed, 23 insertions(+), 22 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2322,7 +2322,7 @@ void task_oncpu_function_call(struct tas + * + * - fork, @p is stable because it isn't on the tasklist yet + * +- * - exec, @p is unstable XXX ++ * - exec, @p is unstable, retry loop + * + * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so + * we should be good. +@@ -3133,21 +3133,36 @@ static void double_rq_unlock(struct rq * + } + + /* +- * If dest_cpu is allowed for this process, migrate the task to it. +- * This is accomplished by forcing the cpu_allowed mask to only +- * allow dest_cpu, which will force the cpu onto dest_cpu. Then +- * the cpu_allowed mask is restored. ++ * sched_exec - execve() is a valuable balancing opportunity, because at ++ * this point the task has the smallest effective memory and cache footprint. + */ +-static void sched_migrate_task(struct task_struct *p, int dest_cpu) ++void sched_exec(void) + { ++ struct task_struct *p = current; + struct migration_req req; ++ int dest_cpu, this_cpu; + unsigned long flags; + struct rq *rq; + ++again: ++ this_cpu = get_cpu(); ++ dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0); ++ if (dest_cpu == this_cpu) { ++ put_cpu(); ++ return; ++ } ++ + rq = task_rq_lock(p, &flags); ++ put_cpu(); ++ ++ /* ++ * select_task_rq() can race against ->cpus_allowed ++ */ + if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) +- || unlikely(!cpu_active(dest_cpu))) +- goto out; ++ || unlikely(!cpu_active(dest_cpu))) { ++ task_rq_unlock(rq, &flags); ++ goto again; ++ } + + /* force the process onto the specified CPU */ + if (migrate_task(p, dest_cpu, &req)) { +@@ -3162,24 +3177,10 @@ static void sched_migrate_task(struct ta + + return; + } +-out: + task_rq_unlock(rq, &flags); + } + + /* +- * sched_exec - execve() is a valuable balancing opportunity, because at +- * this point the task has the smallest effective memory and cache footprint. +- */ +-void sched_exec(void) +-{ +- int new_cpu, this_cpu = get_cpu(); +- new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); +- put_cpu(); +- if (new_cpu != this_cpu) +- sched_migrate_task(current, new_cpu); +-} +- +-/* + * pull_task - move a task from a remote runqueue to the local runqueue. + * Both runqueues must be locked. + */ diff --git a/queue-2.6.32/sched-fix-select_task_rq-vs-hotplug-issues.patch b/queue-2.6.32/sched-fix-select_task_rq-vs-hotplug-issues.patch new file mode 100644 index 00000000000..e6c57e13b29 --- /dev/null +++ b/queue-2.6.32/sched-fix-select_task_rq-vs-hotplug-issues.patch @@ -0,0 +1,132 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:05:27 2010 +From: Peter Zijlstra +Date: Wed, 16 Dec 2009 18:04:38 +0100 +Subject: sched: Fix select_task_rq() vs hotplug issues +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: + +From: Peter Zijlstra + +commit 5da9a0fb673a0ea0a093862f95f6b89b3390c31e upstream + +Since select_task_rq() is now responsible for guaranteeing +->cpus_allowed and cpu_active_mask, we need to verify this. + +select_task_rq_rt() can blindly return +smp_processor_id()/task_cpu() without checking the valid masks, +select_task_rq_fair() can do the same in the rare case that all +SD_flags are disabled. + +Signed-off-by: Peter Zijlstra +Cc: Mike Galbraith +LKML-Reference: <20091216170517.961475466@chello.nl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 76 ++++++++++++++++++++++++++++++--------------------------- + 1 file changed, 40 insertions(+), 36 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2317,6 +2317,43 @@ void task_oncpu_function_call(struct tas + } + + #ifdef CONFIG_SMP ++static int select_fallback_rq(int cpu, struct task_struct *p) ++{ ++ int dest_cpu; ++ const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); ++ ++ /* Look for allowed, online CPU in same node. */ ++ for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) ++ if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) ++ return dest_cpu; ++ ++ /* Any allowed, online CPU? */ ++ dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); ++ if (dest_cpu < nr_cpu_ids) ++ return dest_cpu; ++ ++ /* No more Mr. Nice Guy. */ ++ if (dest_cpu >= nr_cpu_ids) { ++ rcu_read_lock(); ++ cpuset_cpus_allowed_locked(p, &p->cpus_allowed); ++ rcu_read_unlock(); ++ dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); ++ ++ /* ++ * Don't tell them about moving exiting tasks or ++ * kernel threads (both mm NULL), since they never ++ * leave kernel. ++ */ ++ if (p->mm && printk_ratelimit()) { ++ printk(KERN_INFO "process %d (%s) no " ++ "longer affine to cpu%d\n", ++ task_pid_nr(p), p->comm, cpu); ++ } ++ } ++ ++ return dest_cpu; ++} ++ + /* + * Called from: + * +@@ -2343,14 +2380,8 @@ int select_task_rq(struct task_struct *p + * not worry about this generic constraint ] + */ + if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || +- !cpu_active(cpu))) { +- +- cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); +- /* +- * XXX: race against hot-plug modifying cpu_active_mask +- */ +- BUG_ON(cpu >= nr_cpu_ids); +- } ++ !cpu_active(cpu))) ++ cpu = select_fallback_rq(task_cpu(p), p); + + return cpu; + } +@@ -7352,37 +7383,10 @@ static int __migrate_task_irq(struct tas + static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) + { + int dest_cpu; +- const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu)); + + again: +- /* Look for allowed, online CPU in same node. */ +- for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) +- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) +- goto move; +- +- /* Any allowed, online CPU? */ +- dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); +- if (dest_cpu < nr_cpu_ids) +- goto move; +- +- /* No more Mr. Nice Guy. */ +- if (dest_cpu >= nr_cpu_ids) { +- cpuset_cpus_allowed_locked(p, &p->cpus_allowed); +- dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); +- +- /* +- * Don't tell them about moving exiting tasks or +- * kernel threads (both mm NULL), since they never +- * leave kernel. +- */ +- if (p->mm && printk_ratelimit()) { +- printk(KERN_INFO "process %d (%s) no " +- "longer affine to cpu%d\n", +- task_pid_nr(p), p->comm, dead_cpu); +- } +- } ++ dest_cpu = select_fallback_rq(dead_cpu, p); + +-move: + /* It can have affinity changed while we were choosing. */ + if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) + goto again; diff --git a/queue-2.6.32/sched-fix-set_cpu_active-in-cpu_down.patch b/queue-2.6.32/sched-fix-set_cpu_active-in-cpu_down.patch new file mode 100644 index 00000000000..8a9bc7e4abb --- /dev/null +++ b/queue-2.6.32/sched-fix-set_cpu_active-in-cpu_down.patch @@ -0,0 +1,86 @@ +From dfeng@redhat.com Thu Sep 16 14:04:03 2010 +From: Xiaotian Feng +Date: Wed, 16 Dec 2009 18:04:32 +0100 +Subject: sched: Fix set_cpu_active() in cpu_down() +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: + +From: Xiaotian Feng + +commit 9ee349ad6d326df3633d43f54202427295999c47 upstream + +Sachin found cpu hotplug test failures on powerpc, which made +the kernel hang on his POWER box. + +The problem is that we fail to re-activate a cpu when a +hot-unplug fails. Fix this by moving the de-activation into +_cpu_down after doing the initial checks. + +Remove the synchronize_sched() calls and rely on those implied +by rebuilding the sched domains using the new mask. + +Reported-by: Sachin Sant +Signed-off-by: Xiaotian Feng +Tested-by: Sachin Sant +Signed-off-by: Peter Zijlstra +Cc: Mike Galbraith +LKML-Reference: <20091216170517.500272612@chello.nl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cpu.c | 24 +++--------------------- + 1 file changed, 3 insertions(+), 21 deletions(-) + +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -209,6 +209,7 @@ static int __ref _cpu_down(unsigned int + return -ENOMEM; + + cpu_hotplug_begin(); ++ set_cpu_active(cpu, false); + err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, + hcpu, -1, &nr_calls); + if (err == NOTIFY_BAD) { +@@ -280,18 +281,6 @@ int __ref cpu_down(unsigned int cpu) + goto out; + } + +- set_cpu_active(cpu, false); +- +- /* +- * Make sure the all cpus did the reschedule and are not +- * using stale version of the cpu_active_mask. +- * This is not strictly necessary becuase stop_machine() +- * that we run down the line already provides the required +- * synchronization. But it's really a side effect and we do not +- * want to depend on the innards of the stop_machine here. +- */ +- synchronize_sched(); +- + err = _cpu_down(cpu, 0); + + out: +@@ -382,19 +371,12 @@ int disable_nonboot_cpus(void) + return error; + cpu_maps_update_begin(); + first_cpu = cpumask_first(cpu_online_mask); +- /* We take down all of the non-boot CPUs in one shot to avoid races ++ /* ++ * We take down all of the non-boot CPUs in one shot to avoid races + * with the userspace trying to use the CPU hotplug at the same time + */ + cpumask_clear(frozen_cpus); + +- for_each_online_cpu(cpu) { +- if (cpu == first_cpu) +- continue; +- set_cpu_active(cpu, false); +- } +- +- synchronize_sched(); +- + printk("Disabling non-boot CPUs ...\n"); + for_each_online_cpu(cpu) { + if (cpu == first_cpu) diff --git a/queue-2.6.32/sched-make-wakeup-side-and-atomic-variants-of-completion-api-irq-safe.patch b/queue-2.6.32/sched-make-wakeup-side-and-atomic-variants-of-completion-api-irq-safe.patch new file mode 100644 index 00000000000..ac058e2885c --- /dev/null +++ b/queue-2.6.32/sched-make-wakeup-side-and-atomic-variants-of-completion-api-irq-safe.patch @@ -0,0 +1,72 @@ +From rjw@sisk.pl Thu Sep 16 14:03:08 2010 +From: Rafael J.Wysocki +Date: Sun, 13 Dec 2009 00:07:30 +0100 +Subject: sched: Make wakeup side and atomic variants of completion API irq safe +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <09c3ea5b3483bec5c4181b9dfd61b5da6b15969b.1283514306.git.efault@gmx.de> + +From: Rafael J.Wysocki + +commit 7539a3b3d1f892dd97eaf094134d7de55c13befe upstream + +Alan Stern noticed that all the wakeup side (and atomic) variants of the +completion APIs should be irq safe, but the newly introduced +completion_done() and try_wait_for_completion() aren't. The use of the +irq unsafe variants in IRQ contexts can cause crashes/hangs. + +Fix the problem by making them use spin_lock_irqsave() and +spin_lock_irqrestore(). + +Reported-by: Alan Stern +Signed-off-by: Rafael J. Wysocki +Cc: Linus Torvalds +Cc: Zhang Rui +Cc: pm list +Cc: Peter Zijlstra +Cc: David Chinner +Cc: Lachlan McIlroy +LKML-Reference: <200912130007.30541.rjw@sisk.pl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -5947,14 +5947,15 @@ EXPORT_SYMBOL(wait_for_completion_killab + */ + bool try_wait_for_completion(struct completion *x) + { ++ unsigned long flags; + int ret = 1; + +- spin_lock_irq(&x->wait.lock); ++ spin_lock_irqsave(&x->wait.lock, flags); + if (!x->done) + ret = 0; + else + x->done--; +- spin_unlock_irq(&x->wait.lock); ++ spin_unlock_irqrestore(&x->wait.lock, flags); + return ret; + } + EXPORT_SYMBOL(try_wait_for_completion); +@@ -5969,12 +5970,13 @@ EXPORT_SYMBOL(try_wait_for_completion); + */ + bool completion_done(struct completion *x) + { ++ unsigned long flags; + int ret = 1; + +- spin_lock_irq(&x->wait.lock); ++ spin_lock_irqsave(&x->wait.lock, flags); + if (!x->done) + ret = 0; +- spin_unlock_irq(&x->wait.lock); ++ spin_unlock_irqrestore(&x->wait.lock, flags); + return ret; + } + EXPORT_SYMBOL(completion_done); diff --git a/queue-2.6.32/sched-make-warning-less-noisy.patch b/queue-2.6.32/sched-make-warning-less-noisy.patch new file mode 100644 index 00000000000..d1afe311027 --- /dev/null +++ b/queue-2.6.32/sched-make-warning-less-noisy.patch @@ -0,0 +1,33 @@ +From mingo@elte.hu Thu Sep 16 14:04:48 2010 +From: Ingo Molnar +Date: Thu, 17 Dec 2009 06:05:49 +0100 +Subject: sched: Make warning less noisy +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: + +From: Ingo Molnar + +commit 416eb39556a03d1c7e52b0791e9052ccd71db241 upstream + +Cc: Peter Zijlstra +Cc: Mike Galbraith +LKML-Reference: <20091216170517.807938893@chello.nl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2071,7 +2071,7 @@ void set_task_cpu(struct task_struct *p, + * We should never call set_task_cpu() on a blocked task, + * ttwu() will sort out the placement. + */ +- WARN_ON(p->state != TASK_RUNNING && p->state != TASK_WAKING); ++ WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING); + #endif + + trace_sched_migrate_task(p, new_cpu); diff --git a/queue-2.6.32/sched-protect-sched_rr_get_param-access-to-task-sched_class.patch b/queue-2.6.32/sched-protect-sched_rr_get_param-access-to-task-sched_class.patch new file mode 100644 index 00000000000..b4ae27f3602 --- /dev/null +++ b/queue-2.6.32/sched-protect-sched_rr_get_param-access-to-task-sched_class.patch @@ -0,0 +1,116 @@ +From tglx@linutronix.de Thu Sep 16 14:01:34 2010 +From: Thomas Gleixner +Date: Wed, 9 Dec 2009 09:32:03 +0100 +Subject: sched: Protect sched_rr_get_param() access to task->sched_class +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <5b6c8ee25a7849df728e34620f6e39a71dd0ba56.1283514306.git.efault@gmx.de> + +From: Thomas Gleixner + +commit dba091b9e3522b9d32fc9975e48d3b69633b45f0 upstream + +sched_rr_get_param calls +task->sched_class->get_rr_interval(task) without protection +against a concurrent sched_setscheduler() call which modifies +task->sched_class. + +Serialize the access with task_rq_lock(task) and hand the rq +pointer into get_rr_interval() as it's needed at least in the +sched_fair implementation. + +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra +LKML-Reference: +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched.h | 3 ++- + kernel/sched.c | 6 +++++- + kernel/sched_fair.c | 6 +----- + kernel/sched_idletask.c | 2 +- + kernel/sched_rt.c | 2 +- + 5 files changed, 10 insertions(+), 9 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1114,7 +1114,8 @@ struct sched_class { + void (*prio_changed) (struct rq *this_rq, struct task_struct *task, + int oldprio, int running); + +- unsigned int (*get_rr_interval) (struct task_struct *task); ++ unsigned int (*get_rr_interval) (struct rq *rq, ++ struct task_struct *task); + + #ifdef CONFIG_FAIR_GROUP_SCHED + void (*moved_group) (struct task_struct *p); +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -6946,6 +6946,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p + { + struct task_struct *p; + unsigned int time_slice; ++ unsigned long flags; ++ struct rq *rq; + int retval; + struct timespec t; + +@@ -6962,7 +6964,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p + if (retval) + goto out_unlock; + +- time_slice = p->sched_class->get_rr_interval(p); ++ rq = task_rq_lock(p, &flags); ++ time_slice = p->sched_class->get_rr_interval(rq, p); ++ task_rq_unlock(rq, &flags); + + read_unlock(&tasklist_lock); + jiffies_to_timespec(time_slice, &t); +--- a/kernel/sched_fair.c ++++ b/kernel/sched_fair.c +@@ -2003,21 +2003,17 @@ static void moved_group_fair(struct task + } + #endif + +-unsigned int get_rr_interval_fair(struct task_struct *task) ++unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task) + { + struct sched_entity *se = &task->se; +- unsigned long flags; +- struct rq *rq; + unsigned int rr_interval = 0; + + /* + * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise + * idle runqueue: + */ +- rq = task_rq_lock(task, &flags); + if (rq->cfs.load.weight) + rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); +- task_rq_unlock(rq, &flags); + + return rr_interval; + } +--- a/kernel/sched_idletask.c ++++ b/kernel/sched_idletask.c +@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq + check_preempt_curr(rq, p, 0); + } + +-unsigned int get_rr_interval_idle(struct task_struct *task) ++unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) + { + return 0; + } +--- a/kernel/sched_rt.c ++++ b/kernel/sched_rt.c +@@ -1734,7 +1734,7 @@ static void set_curr_task_rt(struct rq * + dequeue_pushable_task(rq, p); + } + +-unsigned int get_rr_interval_rt(struct task_struct *task) ++unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) + { + /* + * Time slice is 0 for SCHED_FIFO tasks diff --git a/queue-2.6.32/sched-protect-task-cpus_allowed-access-in-sched_getaffinity.patch b/queue-2.6.32/sched-protect-task-cpus_allowed-access-in-sched_getaffinity.patch new file mode 100644 index 00000000000..03bcdf3e6d0 --- /dev/null +++ b/queue-2.6.32/sched-protect-task-cpus_allowed-access-in-sched_getaffinity.patch @@ -0,0 +1,50 @@ +From tglx@linutronix.de Thu Sep 16 14:00:26 2010 +Message-Id: <8a8f42c974ba851cdca56d2243ed403812e6e994.1283514306.git.efault@gmx.de> +From: Thomas Gleixner +Date: Tue, 8 Dec 2009 20:24:16 +0000 +Subject: sched: Protect task->cpus_allowed access in sched_getaffinity() +To: stable +Cc: Ingo Molnar , + Peter Zijlstra , Greg KH + + +From: Thomas Gleixner + +commit 3160568371da441b7f2fb57f2f1225404207e8f2 upstream + +sched_getaffinity() is not protected against a concurrent +modification of the tasks affinity. + +Serialize the access with task_rq_lock(task). + +Signed-off-by: Thomas Gleixner +Acked-by: Peter Zijlstra +LKML-Reference: <20091208202026.769251187@linutronix.de> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -6686,6 +6686,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t + long sched_getaffinity(pid_t pid, struct cpumask *mask) + { + struct task_struct *p; ++ unsigned long flags; ++ struct rq *rq; + int retval; + + get_online_cpus(); +@@ -6700,7 +6702,9 @@ long sched_getaffinity(pid_t pid, struct + if (retval) + goto out_unlock; + ++ rq = task_rq_lock(p, &flags); + cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); ++ task_rq_unlock(rq, &flags); + + out_unlock: + read_unlock(&tasklist_lock); diff --git a/queue-2.6.32/sched-remove-forced2_migrations-stats.patch b/queue-2.6.32/sched-remove-forced2_migrations-stats.patch new file mode 100644 index 00000000000..7c40b17523b --- /dev/null +++ b/queue-2.6.32/sched-remove-forced2_migrations-stats.patch @@ -0,0 +1,89 @@ +From mingo@elte.hu Thu Sep 16 14:02:52 2010 +From: Ingo Molnar +Date: Thu, 10 Dec 2009 20:32:39 +0100 +Subject: sched: Remove forced2_migrations stats +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <9d72ed88de455fe6e14baed99ab4b05d6a047ea5.1283514306.git.efault@gmx.de> + +From: Ingo Molnar + +commit b9889ed1ddeca5a3f3569c8de7354e9e97d803ae upstream + +This build warning: + + kernel/sched.c: In function 'set_task_cpu': + kernel/sched.c:2070: warning: unused variable 'old_rq' + +Made me realize that the forced2_migrations stat looks pretty +pointless (and a misnomer) - remove it. + +Cc: Peter Zijlstra +Cc: Mike Galbraith +LKML-Reference: +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched.h | 1 - + kernel/sched.c | 6 ------ + kernel/sched_debug.c | 2 -- + 3 files changed, 9 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1178,7 +1178,6 @@ struct sched_entity { + u64 nr_failed_migrations_running; + u64 nr_failed_migrations_hot; + u64 nr_forced_migrations; +- u64 nr_forced2_migrations; + + u64 nr_wakeups; + u64 nr_wakeups_sync; +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2069,7 +2069,6 @@ task_hot(struct task_struct *p, u64 now, + void set_task_cpu(struct task_struct *p, unsigned int new_cpu) + { + int old_cpu = task_cpu(p); +- struct rq *old_rq = cpu_rq(old_cpu); + struct cfs_rq *old_cfsrq = task_cfs_rq(p), + *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); + +@@ -2077,10 +2076,6 @@ void set_task_cpu(struct task_struct *p, + + if (old_cpu != new_cpu) { + p->se.nr_migrations++; +-#ifdef CONFIG_SCHEDSTATS +- if (task_hot(p, old_rq->clock, NULL)) +- schedstat_inc(p, se.nr_forced2_migrations); +-#endif + perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, + 1, 1, NULL, 0); + } +@@ -2523,7 +2518,6 @@ static void __sched_fork(struct task_str + p->se.nr_failed_migrations_running = 0; + p->se.nr_failed_migrations_hot = 0; + p->se.nr_forced_migrations = 0; +- p->se.nr_forced2_migrations = 0; + + p->se.nr_wakeups = 0; + p->se.nr_wakeups_sync = 0; +--- a/kernel/sched_debug.c ++++ b/kernel/sched_debug.c +@@ -423,7 +423,6 @@ void proc_sched_show_task(struct task_st + P(se.nr_failed_migrations_running); + P(se.nr_failed_migrations_hot); + P(se.nr_forced_migrations); +- P(se.nr_forced2_migrations); + P(se.nr_wakeups); + P(se.nr_wakeups_sync); + P(se.nr_wakeups_migrate); +@@ -499,7 +498,6 @@ void proc_sched_set_task(struct task_str + p->se.nr_failed_migrations_running = 0; + p->se.nr_failed_migrations_hot = 0; + p->se.nr_forced_migrations = 0; +- p->se.nr_forced2_migrations = 0; + p->se.nr_wakeups = 0; + p->se.nr_wakeups_sync = 0; + p->se.nr_wakeups_migrate = 0; diff --git a/queue-2.6.32/sched-remove-rq-clock-coupling-from-set_task_cpu.patch b/queue-2.6.32/sched-remove-rq-clock-coupling-from-set_task_cpu.patch new file mode 100644 index 00000000000..b7126ccb9c8 --- /dev/null +++ b/queue-2.6.32/sched-remove-rq-clock-coupling-from-set_task_cpu.patch @@ -0,0 +1,59 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:02:16 2010 +From: Peter Zijlstra +Date: Fri, 27 Nov 2009 14:12:25 +0100 +Subject: sched: Remove rq->clock coupling from set_task_cpu() +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <345ac4629b04cc360d22cc1ed26bb8986930d383.1283514306.git.efault@gmx.de> + +From: Peter Zijlstra + +commit 5afcdab706d6002cb02b567ba46e650215e694e8 upstream + +set_task_cpu() should be rq invariant and only touch task state, it +currently fails to do so, which opens up a few races, since not all +callers hold both rq->locks. + +Remove the relyance on rq->clock, as any site calling set_task_cpu() +should also do a remote clock update, which should ensure the observed +time between these two cpus is monotonic, as per +kernel/sched_clock.c:sched_clock_remote(). + +Therefore we can simply remove the clock_offset bits and be happy. + +Signed-off-by: Peter Zijlstra +LKML-Reference: +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 13 +------------ + 1 file changed, 1 insertion(+), 12 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2069,23 +2069,12 @@ task_hot(struct task_struct *p, u64 now, + void set_task_cpu(struct task_struct *p, unsigned int new_cpu) + { + int old_cpu = task_cpu(p); +- struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); ++ struct rq *old_rq = cpu_rq(old_cpu); + struct cfs_rq *old_cfsrq = task_cfs_rq(p), + *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); +- u64 clock_offset; +- +- clock_offset = old_rq->clock - new_rq->clock; + + trace_sched_migrate_task(p, new_cpu); + +-#ifdef CONFIG_SCHEDSTATS +- if (p->se.wait_start) +- p->se.wait_start -= clock_offset; +- if (p->se.sleep_start) +- p->se.sleep_start -= clock_offset; +- if (p->se.block_start) +- p->se.block_start -= clock_offset; +-#endif + if (old_cpu != new_cpu) { + p->se.nr_migrations++; + #ifdef CONFIG_SCHEDSTATS diff --git a/queue-2.6.32/sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch b/queue-2.6.32/sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch new file mode 100644 index 00000000000..bda820cf508 --- /dev/null +++ b/queue-2.6.32/sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch @@ -0,0 +1,222 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:05:50 2010 +From: Peter Zijlstra +Date: Wed, 16 Dec 2009 18:04:41 +0100 +Subject: sched: Remove the cfs_rq dependency from set_task_cpu() +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <2b12c4cd8419eba24436eeca57930b6a84f787b6.1283514307.git.efault@gmx.de> + +From: Peter Zijlstra + +commit 88ec22d3edb72b261f8628226cd543589a6d5e1b upstream + +In order to remove the cfs_rq dependency from set_task_cpu() we +need to ensure the task is cfs_rq invariant for all callsites. + +The simple approach is to substract cfs_rq->min_vruntime from +se->vruntime on dequeue, and add cfs_rq->min_vruntime on +enqueue. + +However, this has the downside of breaking FAIR_SLEEPERS since +we loose the old vruntime as we only maintain the relative +position. + +To solve this, we observe that we only migrate runnable tasks, +we do this using deactivate_task(.sleep=0) and +activate_task(.wakeup=0), therefore we can restrain the +min_vruntime invariance to that state. + +The only other case is wakeup balancing, since we want to +maintain the old vruntime we cannot make it relative on dequeue, +but since we don't migrate inactive tasks, we can do so right +before we activate it again. + +This is where we need the new pre-wakeup hook, we need to call +this while still holding the old rq->lock. We could fold it into +->select_task_rq(), but since that has multiple callsites and +would obfuscate the locking requirements, that seems like a +fudge. + +This leaves the fork() case, simply make sure that ->task_fork() +leaves the ->vruntime in a relative state. + +This covers all cases where set_task_cpu() gets called, and +ensures it sees a relative vruntime. + +Signed-off-by: Peter Zijlstra +Cc: Mike Galbraith +LKML-Reference: <20091216170518.191697025@chello.nl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched.h | 2 +- + kernel/sched.c | 6 +----- + kernel/sched_fair.c | 50 ++++++++++++++++++++++++++++++++++++++++++++------ + 3 files changed, 46 insertions(+), 12 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1118,7 +1118,7 @@ struct sched_class { + struct task_struct *task); + + #ifdef CONFIG_FAIR_GROUP_SCHED +- void (*moved_group) (struct task_struct *p); ++ void (*moved_group) (struct task_struct *p, int on_rq); + #endif + }; + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2063,8 +2063,6 @@ task_hot(struct task_struct *p, u64 now, + void set_task_cpu(struct task_struct *p, unsigned int new_cpu) + { + int old_cpu = task_cpu(p); +- struct cfs_rq *old_cfsrq = task_cfs_rq(p), +- *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); + + #ifdef CONFIG_SCHED_DEBUG + /* +@@ -2082,8 +2080,6 @@ void set_task_cpu(struct task_struct *p, + perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, + 1, 1, NULL, 0); + } +- p->se.vruntime -= old_cfsrq->min_vruntime - +- new_cfsrq->min_vruntime; + + __set_task_cpu(p, new_cpu); + } +@@ -10144,7 +10140,7 @@ void sched_move_task(struct task_struct + + #ifdef CONFIG_FAIR_GROUP_SCHED + if (tsk->sched_class->moved_group) +- tsk->sched_class->moved_group(tsk); ++ tsk->sched_class->moved_group(tsk, on_rq); + #endif + + if (unlikely(running)) +--- a/kernel/sched_fair.c ++++ b/kernel/sched_fair.c +@@ -488,6 +488,7 @@ __update_curr(struct cfs_rq *cfs_rq, str + curr->sum_exec_runtime += delta_exec; + schedstat_add(cfs_rq, exec_clock, delta_exec); + delta_exec_weighted = calc_delta_fair(delta_exec, curr); ++ + curr->vruntime += delta_exec_weighted; + update_min_vruntime(cfs_rq); + } +@@ -743,16 +744,26 @@ place_entity(struct cfs_rq *cfs_rq, stru + se->vruntime = vruntime; + } + ++#define ENQUEUE_WAKEUP 1 ++#define ENQUEUE_MIGRATE 2 ++ + static void +-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) ++enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) + { + /* ++ * Update the normalized vruntime before updating min_vruntime ++ * through callig update_curr(). ++ */ ++ if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE)) ++ se->vruntime += cfs_rq->min_vruntime; ++ ++ /* + * Update run-time statistics of the 'current'. + */ + update_curr(cfs_rq); + account_entity_enqueue(cfs_rq, se); + +- if (wakeup) { ++ if (flags & ENQUEUE_WAKEUP) { + place_entity(cfs_rq, se, 0); + enqueue_sleeper(cfs_rq, se); + } +@@ -806,6 +817,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, st + __dequeue_entity(cfs_rq, se); + account_entity_dequeue(cfs_rq, se); + update_min_vruntime(cfs_rq); ++ ++ /* ++ * Normalize the entity after updating the min_vruntime because the ++ * update can refer to the ->curr item and we need to reflect this ++ * movement in our normalized position. ++ */ ++ if (!sleep) ++ se->vruntime -= cfs_rq->min_vruntime; + } + + /* +@@ -1016,13 +1035,19 @@ static void enqueue_task_fair(struct rq + { + struct cfs_rq *cfs_rq; + struct sched_entity *se = &p->se; ++ int flags = 0; ++ ++ if (wakeup) ++ flags |= ENQUEUE_WAKEUP; ++ if (p->state == TASK_WAKING) ++ flags |= ENQUEUE_MIGRATE; + + for_each_sched_entity(se) { + if (se->on_rq) + break; + cfs_rq = cfs_rq_of(se); +- enqueue_entity(cfs_rq, se, wakeup); +- wakeup = 1; ++ enqueue_entity(cfs_rq, se, flags); ++ flags = ENQUEUE_WAKEUP; + } + + hrtick_update(rq); +@@ -1098,6 +1123,14 @@ static void yield_task_fair(struct rq *r + + #ifdef CONFIG_SMP + ++static void task_waking_fair(struct rq *rq, struct task_struct *p) ++{ ++ struct sched_entity *se = &p->se; ++ struct cfs_rq *cfs_rq = cfs_rq_of(se); ++ ++ se->vruntime -= cfs_rq->min_vruntime; ++} ++ + #ifdef CONFIG_FAIR_GROUP_SCHED + /* + * effective_load() calculates the load change as seen from the root_task_group +@@ -1943,6 +1976,8 @@ static void task_fork_fair(struct task_s + resched_task(rq->curr); + } + ++ se->vruntime -= cfs_rq->min_vruntime; ++ + spin_unlock_irqrestore(&rq->lock, flags); + } + +@@ -1996,12 +2031,13 @@ static void set_curr_task_fair(struct rq + } + + #ifdef CONFIG_FAIR_GROUP_SCHED +-static void moved_group_fair(struct task_struct *p) ++static void moved_group_fair(struct task_struct *p, int on_rq) + { + struct cfs_rq *cfs_rq = task_cfs_rq(p); + + update_curr(cfs_rq); +- place_entity(cfs_rq, &p->se, 1); ++ if (!on_rq) ++ place_entity(cfs_rq, &p->se, 1); + } + #endif + +@@ -2041,6 +2077,8 @@ static const struct sched_class fair_sch + .move_one_task = move_one_task_fair, + .rq_online = rq_online_fair, + .rq_offline = rq_offline_fair, ++ ++ .task_waking = task_waking_fair, + #endif + + .set_curr_task = set_curr_task_fair, diff --git a/queue-2.6.32/sched-remove-unused-cpu_nr_migrations.patch b/queue-2.6.32/sched-remove-unused-cpu_nr_migrations.patch new file mode 100644 index 00000000000..fb922e39158 --- /dev/null +++ b/queue-2.6.32/sched-remove-unused-cpu_nr_migrations.patch @@ -0,0 +1,69 @@ +From h-shimamoto@ct.jp.nec.com Thu Sep 16 14:02:02 2010 +From: Hiroshi Shimamoto +Date: Wed, 4 Nov 2009 16:16:54 +0900 +Subject: sched: Remove unused cpu_nr_migrations() +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <2138ef8909e9abf67502990b21f16d6f078ed83f.1283514306.git.efault@gmx.de> + +From: Hiroshi Shimamoto + +commit 9824a2b728b63e7ff586b9fd9293c819be79f0f3 upstream + +cpu_nr_migrations() is not used, remove it. + +Signed-off-by: Hiroshi Shimamoto +Cc: Peter Zijlstra +LKML-Reference: <4AF12A66.6020609@ct.jp.nec.com> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched.h | 1 - + kernel/sched.c | 11 ----------- + 2 files changed, 12 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void) + + + extern void calc_global_load(void); +-extern u64 cpu_nr_migrations(int cpu); + + extern unsigned long get_parent_ip(unsigned long addr); + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -542,7 +542,6 @@ struct rq { + struct load_weight load; + unsigned long nr_load_updates; + u64 nr_switches; +- u64 nr_migrations_in; + + struct cfs_rq cfs; + struct rt_rq rt; +@@ -2089,7 +2088,6 @@ void set_task_cpu(struct task_struct *p, + #endif + if (old_cpu != new_cpu) { + p->se.nr_migrations++; +- new_rq->nr_migrations_in++; + #ifdef CONFIG_SCHEDSTATS + if (task_hot(p, old_rq->clock, NULL)) + schedstat_inc(p, se.nr_forced2_migrations); +@@ -3048,15 +3046,6 @@ static void calc_load_account_active(str + } + + /* +- * Externally visible per-cpu scheduler statistics: +- * cpu_nr_migrations(cpu) - number of migrations into that cpu +- */ +-u64 cpu_nr_migrations(int cpu) +-{ +- return cpu_rq(cpu)->nr_migrations_in; +-} +- +-/* + * Update rq->cpu_load[] statistics. This function is usually called every + * scheduler tick (TICK_NSEC). + */ diff --git a/queue-2.6.32/sched-sanitize-fork-handling.patch b/queue-2.6.32/sched-sanitize-fork-handling.patch new file mode 100644 index 00000000000..1a9aeb6c7a8 --- /dev/null +++ b/queue-2.6.32/sched-sanitize-fork-handling.patch @@ -0,0 +1,178 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:02:42 2010 +From: Peter Zijlstra +Date: Fri, 27 Nov 2009 17:32:46 +0100 +Subject: sched: Sanitize fork() handling +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <813f92ca73a6ad9adc923c0d8b5bc346429452d4.1283514306.git.efault@gmx.de> + +From: Peter Zijlstra + +commit cd29fe6f2637cc2ccbda5ac65f5332d6bf5fa3c6 upstream + +Currently we try to do task placement in wake_up_new_task() after we do +the load-balance pass in sched_fork(). This yields complicated semantics +in that we have to deal with tasks on different RQs and the +set_task_cpu() calls in copy_process() and sched_fork() + +Rename ->task_new() to ->task_fork() and call it from sched_fork() +before the balancing, this gives the policy a clear point to place the +task. + +Signed-off-by: Peter Zijlstra +LKML-Reference: +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/sched.h | 2 +- + kernel/sched.c | 43 ++++++++++++++++++------------------------- + kernel/sched_fair.c | 28 +++++++++++++++------------- + 3 files changed, 34 insertions(+), 39 deletions(-) + +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1104,7 +1104,7 @@ struct sched_class { + + void (*set_curr_task) (struct rq *rq); + void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); +- void (*task_new) (struct rq *rq, struct task_struct *p); ++ void (*task_fork) (struct task_struct *p); + + void (*switched_from) (struct rq *this_rq, struct task_struct *task, + int running); +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -1821,6 +1821,20 @@ static void cfs_rq_set_shares(struct cfs + static void calc_load_account_active(struct rq *this_rq); + static void update_sysctl(void); + ++static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) ++{ ++ set_task_rq(p, cpu); ++#ifdef CONFIG_SMP ++ /* ++ * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be ++ * successfuly executed on another CPU. We must ensure that updates of ++ * per-task data have been completed by this moment. ++ */ ++ smp_wmb(); ++ task_thread_info(p)->cpu = cpu; ++#endif ++} ++ + #include "sched_stats.h" + #include "sched_idletask.c" + #include "sched_fair.c" +@@ -1977,20 +1991,6 @@ inline int task_curr(const struct task_s + return cpu_curr(task_cpu(p)) == p; + } + +-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) +-{ +- set_task_rq(p, cpu); +-#ifdef CONFIG_SMP +- /* +- * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be +- * successfuly executed on another CPU. We must ensure that updates of +- * per-task data have been completed by this moment. +- */ +- smp_wmb(); +- task_thread_info(p)->cpu = cpu; +-#endif +-} +- + static inline void check_class_changed(struct rq *rq, struct task_struct *p, + const struct sched_class *prev_class, + int oldprio, int running) +@@ -2593,6 +2593,9 @@ void sched_fork(struct task_struct *p, i + if (!rt_prio(p->prio)) + p->sched_class = &fair_sched_class; + ++ if (p->sched_class->task_fork) ++ p->sched_class->task_fork(p); ++ + #ifdef CONFIG_SMP + cpu = select_task_rq(p, SD_BALANCE_FORK, 0); + #endif +@@ -2629,17 +2632,7 @@ void wake_up_new_task(struct task_struct + rq = task_rq_lock(p, &flags); + BUG_ON(p->state != TASK_RUNNING); + update_rq_clock(rq); +- +- if (!p->sched_class->task_new || !current->se.on_rq) { +- activate_task(rq, p, 0); +- } else { +- /* +- * Let the scheduling class do new task startup +- * management (if any): +- */ +- p->sched_class->task_new(rq, p); +- inc_nr_running(rq); +- } ++ activate_task(rq, p, 0); + trace_sched_wakeup_new(rq, p, 1); + check_preempt_curr(rq, p, WF_FORK); + #ifdef CONFIG_SMP +--- a/kernel/sched_fair.c ++++ b/kernel/sched_fair.c +@@ -1911,28 +1911,30 @@ static void task_tick_fair(struct rq *rq + } + + /* +- * Share the fairness runtime between parent and child, thus the +- * total amount of pressure for CPU stays equal - new tasks +- * get a chance to run but frequent forkers are not allowed to +- * monopolize the CPU. Note: the parent runqueue is locked, +- * the child is not running yet. ++ * called on fork with the child task as argument from the parent's context ++ * - child not yet on the tasklist ++ * - preemption disabled + */ +-static void task_new_fair(struct rq *rq, struct task_struct *p) ++static void task_fork_fair(struct task_struct *p) + { +- struct cfs_rq *cfs_rq = task_cfs_rq(p); ++ struct cfs_rq *cfs_rq = task_cfs_rq(current); + struct sched_entity *se = &p->se, *curr = cfs_rq->curr; + int this_cpu = smp_processor_id(); ++ struct rq *rq = this_rq(); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&rq->lock, flags); + +- sched_info_queued(p); ++ if (unlikely(task_cpu(p) != this_cpu)) ++ __set_task_cpu(p, this_cpu); + + update_curr(cfs_rq); ++ + if (curr) + se->vruntime = curr->vruntime; + place_entity(cfs_rq, se, 1); + +- /* 'curr' will be NULL if the child belongs to a different group */ +- if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && +- curr && entity_before(curr, se)) { ++ if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) { + /* + * Upon rescheduling, sched_class::put_prev_task() will place + * 'current' within the tree based on its new key value. +@@ -1941,7 +1943,7 @@ static void task_new_fair(struct rq *rq, + resched_task(rq->curr); + } + +- enqueue_task_fair(rq, p, 0); ++ spin_unlock_irqrestore(&rq->lock, flags); + } + + /* +@@ -2043,7 +2045,7 @@ static const struct sched_class fair_sch + + .set_curr_task = set_curr_task_fair, + .task_tick = task_tick_fair, +- .task_new = task_new_fair, ++ .task_fork = task_fork_fair, + + .prio_changed = prio_changed_fair, + .switched_to = switched_to_fair, diff --git a/queue-2.6.32/sched-use-rcu-in-sched_get-set_affinity.patch b/queue-2.6.32/sched-use-rcu-in-sched_get-set_affinity.patch new file mode 100644 index 00000000000..0c38609f997 --- /dev/null +++ b/queue-2.6.32/sched-use-rcu-in-sched_get-set_affinity.patch @@ -0,0 +1,77 @@ +From tglx@linutronix.de Thu Sep 16 14:03:36 2010 +From: Thomas Gleixner +Date: Wed, 9 Dec 2009 10:15:01 +0000 +Subject: sched: Use rcu in sched_get/set_affinity() +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <9939fdaefe12d123e26bdbf51b8b502aca64ae42.1283514306.git.efault@gmx.de> + +From: Thomas Gleixner + +commit 23f5d142519621b16cf2b378cf8adf4dcf01a616 upstream + +tasklist_lock is held read locked to protect the +find_task_by_vpid() call and to prevent the task going away. +sched_setaffinity acquires a task struct ref and drops tasklist +lock right away. The access to the cpus_allowed mask is +protected by rq->lock. + +rcu_read_lock() provides the same protection here. + +Signed-off-by: Thomas Gleixner +Cc: Peter Zijlstra +LKML-Reference: <20091209100706.789059966@linutronix.de> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -6563,22 +6563,18 @@ long sched_setaffinity(pid_t pid, const + int retval; + + get_online_cpus(); +- read_lock(&tasklist_lock); ++ rcu_read_lock(); + + p = find_process_by_pid(pid); + if (!p) { +- read_unlock(&tasklist_lock); ++ rcu_read_unlock(); + put_online_cpus(); + return -ESRCH; + } + +- /* +- * It is not safe to call set_cpus_allowed with the +- * tasklist_lock held. We will bump the task_struct's +- * usage count and then drop tasklist_lock. +- */ ++ /* Prevent p going away */ + get_task_struct(p); +- read_unlock(&tasklist_lock); ++ rcu_read_unlock(); + + if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { + retval = -ENOMEM; +@@ -6664,7 +6660,7 @@ long sched_getaffinity(pid_t pid, struct + int retval; + + get_online_cpus(); +- read_lock(&tasklist_lock); ++ rcu_read_lock(); + + retval = -ESRCH; + p = find_process_by_pid(pid); +@@ -6680,7 +6676,7 @@ long sched_getaffinity(pid_t pid, struct + task_rq_unlock(rq, &flags); + + out_unlock: +- read_unlock(&tasklist_lock); ++ rcu_read_unlock(); + put_online_cpus(); + + return retval; diff --git a/queue-2.6.32/sched-use-rcu-in-sched_get_rr_param.patch b/queue-2.6.32/sched-use-rcu-in-sched_get_rr_param.patch new file mode 100644 index 00000000000..8b13a2fd3d4 --- /dev/null +++ b/queue-2.6.32/sched-use-rcu-in-sched_get_rr_param.patch @@ -0,0 +1,60 @@ +From tglx@linutronix.de Thu Sep 16 14:03:50 2010 +From: Thomas Gleixner +Date: Wed, 9 Dec 2009 10:15:11 +0000 +Subject: sched: Use rcu in sched_get_rr_param() +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <05d6447642d29f8934b54889b15fd010680d3b57.1283514306.git.efault@gmx.de> + +From: Thomas Gleixner + +commit 1a551ae715825bb2a2107a2dd68de024a1fa4e32 upstream + +read_lock(&tasklist_lock) does not protect +sys_sched_get_rr_param() against a concurrent update of the +policy or scheduler parameters as do_sched_scheduler() does not +take the tasklist_lock. + +The access to task->sched_class->get_rr_interval is protected by +task_rq_lock(task). + +Use rcu_read_lock() to protect find_task_by_vpid() and prevent +the task struct from going away. + +Signed-off-by: Thomas Gleixner +Cc: Peter Zijlstra +LKML-Reference: <20091209100706.862897167@linutronix.de> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -6924,7 +6924,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p + return -EINVAL; + + retval = -ESRCH; +- read_lock(&tasklist_lock); ++ rcu_read_lock(); + p = find_process_by_pid(pid); + if (!p) + goto out_unlock; +@@ -6937,13 +6937,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, p + time_slice = p->sched_class->get_rr_interval(rq, p); + task_rq_unlock(rq, &flags); + +- read_unlock(&tasklist_lock); ++ rcu_read_unlock(); + jiffies_to_timespec(time_slice, &t); + retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; + return retval; + + out_unlock: +- read_unlock(&tasklist_lock); ++ rcu_read_unlock(); + return retval; + } + diff --git a/queue-2.6.32/sched-use-rcu-in-sys_sched_getscheduler-sys_sched_getparam.patch b/queue-2.6.32/sched-use-rcu-in-sys_sched_getscheduler-sys_sched_getparam.patch new file mode 100644 index 00000000000..6da76f50d44 --- /dev/null +++ b/queue-2.6.32/sched-use-rcu-in-sys_sched_getscheduler-sys_sched_getparam.patch @@ -0,0 +1,80 @@ +From tglx@linutronix.de Thu Sep 16 14:03:22 2010 +From: Thomas Gleixner +Date: Wed, 9 Dec 2009 10:14:58 +0000 +Subject: sched: Use rcu in sys_sched_getscheduler/sys_sched_getparam() +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: + +From: Thomas Gleixner + +commit 5fe85be081edf0ac92d83f9c39e0ab5c1371eb82 upstream + +read_lock(&tasklist_lock) does not protect +sys_sched_getscheduler and sys_sched_getparam() against a +concurrent update of the policy or scheduler parameters as +do_sched_setscheduler() does not take the tasklist_lock. The +accessed integers can be retrieved w/o locking and are snapshots +anyway. + +Using rcu_read_lock() to protect find_task_by_vpid() and prevent +the task struct from going away is not changing the above +situation. + +Signed-off-by: Thomas Gleixner +Cc: Peter Zijlstra +LKML-Reference: <20091209100706.753790977@linutronix.de> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -6505,7 +6505,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_ + return -EINVAL; + + retval = -ESRCH; +- read_lock(&tasklist_lock); ++ rcu_read_lock(); + p = find_process_by_pid(pid); + if (p) { + retval = security_task_getscheduler(p); +@@ -6513,7 +6513,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_ + retval = p->policy + | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); + } +- read_unlock(&tasklist_lock); ++ rcu_read_unlock(); + return retval; + } + +@@ -6531,7 +6531,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, p + if (!param || pid < 0) + return -EINVAL; + +- read_lock(&tasklist_lock); ++ rcu_read_lock(); + p = find_process_by_pid(pid); + retval = -ESRCH; + if (!p) +@@ -6542,7 +6542,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, p + goto out_unlock; + + lp.sched_priority = p->rt_priority; +- read_unlock(&tasklist_lock); ++ rcu_read_unlock(); + + /* + * This one might sleep, we cannot do it with a spinlock held ... +@@ -6552,7 +6552,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, p + return retval; + + out_unlock: +- read_unlock(&tasklist_lock); ++ rcu_read_unlock(); + return retval; + } + diff --git a/queue-2.6.32/sched-use-task_waking-for-fork-wakups.patch b/queue-2.6.32/sched-use-task_waking-for-fork-wakups.patch new file mode 100644 index 00000000000..df9bc15d2d9 --- /dev/null +++ b/queue-2.6.32/sched-use-task_waking-for-fork-wakups.patch @@ -0,0 +1,72 @@ +From a.p.zijlstra@chello.nl Thu Sep 16 14:04:16 2010 +From: Peter Zijlstra +Date: Wed, 16 Dec 2009 18:04:35 +0100 +Subject: sched: Use TASK_WAKING for fork wakups +To: stable +Cc: Ingo Molnar , Peter Zijlstra , Greg KH +Message-ID: <6745f030761d6cd4b79f13d12c5dee74d2e940fa.1283514307.git.efault@gmx.de> + +From: Peter Zijlstra + +commit 06b83b5fbea273672822b6ee93e16781046553ec upstream + +For later convenience use TASK_WAKING for fresh tasks. + +Signed-off-by: Peter Zijlstra +Cc: Mike Galbraith +LKML-Reference: <20091216170517.732561278@chello.nl> +Signed-off-by: Ingo Molnar +Signed-off-by: Mike Galbraith +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/kernel/sched.c ++++ b/kernel/sched.c +@@ -2538,14 +2538,6 @@ static void __sched_fork(struct task_str + #ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&p->preempt_notifiers); + #endif +- +- /* +- * We mark the process as running here, but have not actually +- * inserted it onto the runqueue yet. This guarantees that +- * nobody will actually run it, and a signal or other external +- * event cannot wake it up and insert it on the runqueue either. +- */ +- p->state = TASK_RUNNING; + } + + /* +@@ -2556,6 +2548,12 @@ void sched_fork(struct task_struct *p, i + int cpu = get_cpu(); + + __sched_fork(p); ++ /* ++ * We mark the process as waking here. This guarantees that ++ * nobody will actually run it, and a signal or other external ++ * event cannot wake it up and insert it on the runqueue either. ++ */ ++ p->state = TASK_WAKING; + + /* + * Revert to default priority/policy on fork if requested. +@@ -2624,7 +2622,8 @@ void wake_up_new_task(struct task_struct + struct rq *rq; + + rq = task_rq_lock(p, &flags); +- BUG_ON(p->state != TASK_RUNNING); ++ BUG_ON(p->state != TASK_WAKING); ++ p->state = TASK_RUNNING; + update_rq_clock(rq); + activate_task(rq, p, 0); + trace_sched_wakeup_new(rq, p, 1); +@@ -7034,6 +7033,7 @@ void __cpuinit init_idle(struct task_str + spin_lock_irqsave(&rq->lock, flags); + + __sched_fork(idle); ++ idle->state = TASK_RUNNING; + idle->se.exec_start = sched_clock(); + + cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); diff --git a/queue-2.6.32/series b/queue-2.6.32/series index eed3181697b..da0d77b7fe0 100644 --- a/queue-2.6.32/series +++ b/queue-2.6.32/series @@ -62,3 +62,26 @@ x86-tsc-fix-a-preemption-leak-in-restore_sched_clock_state.patch x86-64-compat-test-rax-for-the-syscall-number-not-eax.patch compat-make-compat_alloc_user_space-incorporate-the-access_ok.patch x86-64-compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch +sched-protect-task-cpus_allowed-access-in-sched_getaffinity.patch +sched-protect-sched_rr_get_param-access-to-task-sched_class.patch +sched-consolidate-select_task_rq-callers.patch +sched-remove-unused-cpu_nr_migrations.patch +sched-remove-rq-clock-coupling-from-set_task_cpu.patch +sched-clean-up-ttwu-rq-locking.patch +sched-sanitize-fork-handling.patch +sched-remove-forced2_migrations-stats.patch +sched-make-wakeup-side-and-atomic-variants-of-completion-api-irq-safe.patch +sched-use-rcu-in-sys_sched_getscheduler-sys_sched_getparam.patch +sched-use-rcu-in-sched_get-set_affinity.patch +sched-use-rcu-in-sched_get_rr_param.patch +sched-fix-set_cpu_active-in-cpu_down.patch +sched-use-task_waking-for-fork-wakups.patch +sched-ensure-set_task_cpu-is-never-called-on-blocked-tasks.patch +sched-make-warning-less-noisy.patch +sched-fix-broken-assertion.patch +sched-fix-sched_exec-balancing.patch +sched-fix-select_task_rq-vs-hotplug-issues.patch +sched-add-pre-and-post-wakeup-hooks.patch +sched-remove-the-cfs_rq-dependency-from-set_task_cpu.patch +sched-fix-hotplug-hang.patch +sched-fix-fork-vs-hotplug-vs-cpuset-namespaces.patch