From: Greg Kroah-Hartman Date: Mon, 13 Jan 2014 18:28:27 +0000 (-0800) Subject: 3.4-stable patches X-Git-Tag: v3.4.77~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=721818c98ad3df5d9cb6aae0a059fa691e77b424;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch sched-fix-hrtimer_cancel-rq-lock-deadlock.patch sched-fix-race-on-toggling-cfs_bandwidth_used.patch sched-guarantee-new-group-entities-always-have-weight.patch --- diff --git a/queue-3.4/sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch b/queue-3.4/sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch new file mode 100644 index 00000000000..a5af1b91bc4 --- /dev/null +++ b/queue-3.4/sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch @@ -0,0 +1,57 @@ +From db06e78cc13d70f10877e0557becc88ab3ad2be8 Mon Sep 17 00:00:00 2001 +From: Ben Segall +Date: Wed, 16 Oct 2013 11:16:17 -0700 +Subject: sched: Fix cfs_bandwidth misuse of hrtimer_expires_remaining + +From: Ben Segall + +commit db06e78cc13d70f10877e0557becc88ab3ad2be8 upstream. + +hrtimer_expires_remaining does not take internal hrtimer locks and thus +must be guarded against concurrent __hrtimer_start_range_ns (but +returning HRTIMER_RESTART is safe). Use cfs_b->lock to make it safe. + +Signed-off-by: Ben Segall +Signed-off-by: Peter Zijlstra +Cc: pjt@google.com +Link: http://lkml.kernel.org/r/20131016181617.22647.73829.stgit@sword-of-the-dawn.mtv.corp.google.com +Signed-off-by: Ingo Molnar +Cc: Chris J Arges +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -1831,7 +1831,13 @@ static const u64 min_bandwidth_expiratio + /* how long we wait to gather additional slack before distributing */ + static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC; + +-/* are we near the end of the current quota period? */ ++/* ++ * Are we near the end of the current quota period? ++ * ++ * Requires cfs_b->lock for hrtimer_expires_remaining to be safe against the ++ * hrtimer base being cleared by __hrtimer_start_range_ns. In the case of ++ * migrate_hrtimers, base is never cleared, so we are fine. ++ */ + static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire) + { + struct hrtimer *refresh_timer = &cfs_b->period_timer; +@@ -1907,10 +1913,12 @@ static void do_sched_cfs_slack_timer(str + u64 expires; + + /* confirm we're still not at a refresh boundary */ +- if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) ++ raw_spin_lock(&cfs_b->lock); ++ if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { ++ raw_spin_unlock(&cfs_b->lock); + return; ++ } + +- raw_spin_lock(&cfs_b->lock); + if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) { + runtime = cfs_b->runtime; + cfs_b->runtime = 0; diff --git a/queue-3.4/sched-fix-hrtimer_cancel-rq-lock-deadlock.patch b/queue-3.4/sched-fix-hrtimer_cancel-rq-lock-deadlock.patch new file mode 100644 index 00000000000..d7cbf97073f --- /dev/null +++ b/queue-3.4/sched-fix-hrtimer_cancel-rq-lock-deadlock.patch @@ -0,0 +1,63 @@ +From 927b54fccbf04207ec92f669dce6806848cbec7d Mon Sep 17 00:00:00 2001 +From: Ben Segall +Date: Wed, 16 Oct 2013 11:16:22 -0700 +Subject: sched: Fix hrtimer_cancel()/rq->lock deadlock + +From: Ben Segall + +commit 927b54fccbf04207ec92f669dce6806848cbec7d upstream. + +__start_cfs_bandwidth calls hrtimer_cancel while holding rq->lock, +waiting for the hrtimer to finish. However, if sched_cfs_period_timer +runs for another loop iteration, the hrtimer can attempt to take +rq->lock, resulting in deadlock. + +Fix this by ensuring that cfs_b->timer_active is cleared only if the +_latest_ call to do_sched_cfs_period_timer is returning as idle. Then +__start_cfs_bandwidth can just call hrtimer_try_to_cancel and wait for +that to succeed or timer_active == 1. + +Signed-off-by: Ben Segall +Signed-off-by: Peter Zijlstra +Cc: pjt@google.com +Link: http://lkml.kernel.org/r/20131016181622.22647.16643.stgit@sword-of-the-dawn.mtv.corp.google.com +Signed-off-by: Ingo Molnar +Cc: Chris J Arges +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -1771,6 +1771,13 @@ static int do_sched_cfs_period_timer(str + if (idle) + goto out_unlock; + ++ /* ++ * if we have relooped after returning idle once, we need to update our ++ * status as actually running, so that other cpus doing ++ * __start_cfs_bandwidth will stop trying to cancel us. ++ */ ++ cfs_b->timer_active = 1; ++ + __refill_cfs_bandwidth_runtime(cfs_b); + + if (!throttled) { +@@ -2043,11 +2050,11 @@ void __start_cfs_bandwidth(struct cfs_ba + * (timer_active==0 becomes visible before the hrtimer call-back + * terminates). In either case we ensure that it's re-programmed + */ +- while (unlikely(hrtimer_active(&cfs_b->period_timer))) { ++ while (unlikely(hrtimer_active(&cfs_b->period_timer)) && ++ hrtimer_try_to_cancel(&cfs_b->period_timer) < 0) { ++ /* bounce the lock to allow do_sched_cfs_period_timer to run */ + raw_spin_unlock(&cfs_b->lock); +- /* ensure cfs_b->lock is available while we wait */ +- hrtimer_cancel(&cfs_b->period_timer); +- ++ cpu_relax(); + raw_spin_lock(&cfs_b->lock); + /* if someone else restarted the timer then we're done */ + if (cfs_b->timer_active) diff --git a/queue-3.4/sched-fix-race-on-toggling-cfs_bandwidth_used.patch b/queue-3.4/sched-fix-race-on-toggling-cfs_bandwidth_used.patch new file mode 100644 index 00000000000..0fc6cab4ed9 --- /dev/null +++ b/queue-3.4/sched-fix-race-on-toggling-cfs_bandwidth_used.patch @@ -0,0 +1,105 @@ +From 1ee14e6c8cddeeb8a490d7b54cd9016e4bb900b4 Mon Sep 17 00:00:00 2001 +From: Ben Segall +Date: Wed, 16 Oct 2013 11:16:12 -0700 +Subject: sched: Fix race on toggling cfs_bandwidth_used + +From: Ben Segall + +commit 1ee14e6c8cddeeb8a490d7b54cd9016e4bb900b4 upstream. + +When we transition cfs_bandwidth_used to false, any currently +throttled groups will incorrectly return false from cfs_rq_throttled. +While tg_set_cfs_bandwidth will unthrottle them eventually, currently +running code (including at least dequeue_task_fair and +distribute_cfs_runtime) will cause errors. + +Fix this by turning off cfs_bandwidth_used only after unthrottling all +cfs_rqs. + +Tested: toggle bandwidth back and forth on a loaded cgroup. Caused +crashes in minutes without the patch, hasn't crashed with it. + +Signed-off-by: Ben Segall +Signed-off-by: Peter Zijlstra +Cc: pjt@google.com +Link: http://lkml.kernel.org/r/20131016181611.22647.80365.stgit@sword-of-the-dawn.mtv.corp.google.com +Signed-off-by: Ingo Molnar +Cc: Chris J Arges +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/core.c | 9 ++++++++- + kernel/sched/fair.c | 16 +++++++++------- + kernel/sched/sched.h | 3 ++- + 3 files changed, 19 insertions(+), 9 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -7906,7 +7906,12 @@ static int tg_set_cfs_bandwidth(struct t + + runtime_enabled = quota != RUNTIME_INF; + runtime_was_enabled = cfs_b->quota != RUNTIME_INF; +- account_cfs_bandwidth_used(runtime_enabled, runtime_was_enabled); ++ /* ++ * If we need to toggle cfs_bandwidth_used, off->on must occur ++ * before making related changes, and on->off must occur afterwards ++ */ ++ if (runtime_enabled && !runtime_was_enabled) ++ cfs_bandwidth_usage_inc(); + raw_spin_lock_irq(&cfs_b->lock); + cfs_b->period = ns_to_ktime(period); + cfs_b->quota = quota; +@@ -7932,6 +7937,8 @@ static int tg_set_cfs_bandwidth(struct t + unthrottle_cfs_rq(cfs_rq); + raw_spin_unlock_irq(&rq->lock); + } ++ if (runtime_was_enabled && !runtime_enabled) ++ cfs_bandwidth_usage_dec(); + out_unlock: + mutex_unlock(&cfs_constraints_mutex); + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -1393,13 +1393,14 @@ static inline bool cfs_bandwidth_used(vo + return static_key_false(&__cfs_bandwidth_used); + } + +-void account_cfs_bandwidth_used(int enabled, int was_enabled) ++void cfs_bandwidth_usage_inc(void) + { +- /* only need to count groups transitioning between enabled/!enabled */ +- if (enabled && !was_enabled) +- static_key_slow_inc(&__cfs_bandwidth_used); +- else if (!enabled && was_enabled) +- static_key_slow_dec(&__cfs_bandwidth_used); ++ static_key_slow_inc(&__cfs_bandwidth_used); ++} ++ ++void cfs_bandwidth_usage_dec(void) ++{ ++ static_key_slow_dec(&__cfs_bandwidth_used); + } + #else /* HAVE_JUMP_LABEL */ + static bool cfs_bandwidth_used(void) +@@ -1407,7 +1408,8 @@ static bool cfs_bandwidth_used(void) + return true; + } + +-void account_cfs_bandwidth_used(int enabled, int was_enabled) {} ++void cfs_bandwidth_usage_inc(void) {} ++void cfs_bandwidth_usage_dec(void) {} + #endif /* HAVE_JUMP_LABEL */ + + /* +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -1140,7 +1140,8 @@ extern void init_cfs_rq(struct cfs_rq *c + extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); + extern void unthrottle_offline_cfs_rqs(struct rq *rq); + +-extern void account_cfs_bandwidth_used(int enabled, int was_enabled); ++extern void cfs_bandwidth_usage_inc(void); ++extern void cfs_bandwidth_usage_dec(void); + + #ifdef CONFIG_NO_HZ + enum rq_nohz_flag_bits { diff --git a/queue-3.4/sched-guarantee-new-group-entities-always-have-weight.patch b/queue-3.4/sched-guarantee-new-group-entities-always-have-weight.patch new file mode 100644 index 00000000000..301035a405c --- /dev/null +++ b/queue-3.4/sched-guarantee-new-group-entities-always-have-weight.patch @@ -0,0 +1,61 @@ +From 0ac9b1c21874d2490331233b3242085f8151e166 Mon Sep 17 00:00:00 2001 +From: Paul Turner +Date: Wed, 16 Oct 2013 11:16:27 -0700 +Subject: sched: Guarantee new group-entities always have weight + +From: Paul Turner + +commit 0ac9b1c21874d2490331233b3242085f8151e166 upstream. + +Currently, group entity load-weights are initialized to zero. This +admits some races with respect to the first time they are re-weighted in +earlty use. ( Let g[x] denote the se for "g" on cpu "x". ) + +Suppose that we have root->a and that a enters a throttled state, +immediately followed by a[0]->t1 (the only task running on cpu[0]) +blocking: + + put_prev_task(group_cfs_rq(a[0]), t1) + put_prev_entity(..., t1) + check_cfs_rq_runtime(group_cfs_rq(a[0])) + throttle_cfs_rq(group_cfs_rq(a[0])) + +Then, before unthrottling occurs, let a[0]->b[0]->t2 wake for the first +time: + + enqueue_task_fair(rq[0], t2) + enqueue_entity(group_cfs_rq(b[0]), t2) + enqueue_entity_load_avg(group_cfs_rq(b[0]), t2) + account_entity_enqueue(group_cfs_ra(b[0]), t2) + update_cfs_shares(group_cfs_rq(b[0])) + < skipped because b is part of a throttled hierarchy > + enqueue_entity(group_cfs_rq(a[0]), b[0]) + ... + +We now have b[0] enqueued, yet group_cfs_rq(a[0])->load.weight == 0 +which violates invariants in several code-paths. Eliminate the +possibility of this by initializing group entity weight. + +Signed-off-by: Paul Turner +Signed-off-by: Peter Zijlstra +Link: http://lkml.kernel.org/r/20131016181627.22647.47543.stgit@sword-of-the-dawn.mtv.corp.google.com +Signed-off-by: Ingo Molnar +Cc: Chris J Arges +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -5470,7 +5470,8 @@ void init_tg_cfs_entry(struct task_group + se->cfs_rq = parent->my_q; + + se->my_q = cfs_rq; +- update_load_set(&se->load, 0); ++ /* guarantee group entities always have weight */ ++ update_load_set(&se->load, NICE_0_LOAD); + se->parent = parent; + } + diff --git a/queue-3.4/series b/queue-3.4/series index c859ae59d94..a39bf52d053 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -21,3 +21,7 @@ bridge-use-spin_lock_bh-in-br_multicast_set_hash_max.patch arm-fix-bad-mode-in-...-handler-message-for-undefined-instructions.patch arm-shmobile-mackerel-fix-coherent-dma-mask.patch x86-fpu-amd-clear-exceptions-in-amd-fxsave-workaround.patch +sched-fix-race-on-toggling-cfs_bandwidth_used.patch +sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch +sched-fix-hrtimer_cancel-rq-lock-deadlock.patch +sched-guarantee-new-group-entities-always-have-weight.patch