3.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 13 Jan 2014 18:28:23 +0000 (10:28 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 13 Jan 2014 18:28:23 +0000 (10:28 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 13 Jan 2014 18:28:23 +0000 (10:28 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 13 Jan 2014 18:28:23 +0000 (10:28 -0800)
diff --git a/queue-3.12/sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch b/queue-3.12/sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch

new file mode 100644 (file)

index 0000000..1780f9f
--- /dev/null
+++ b/queue-3.12/sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch
@@ -0,0 +1,57 @@
+From db06e78cc13d70f10877e0557becc88ab3ad2be8 Mon Sep 17 00:00:00 2001
+From: Ben Segall <bsegall@google.com>
+Date: Wed, 16 Oct 2013 11:16:17 -0700
+Subject: sched: Fix cfs_bandwidth misuse of hrtimer_expires_remaining
+
+From: Ben Segall <bsegall@google.com>
+
+commit db06e78cc13d70f10877e0557becc88ab3ad2be8 upstream.
+
+hrtimer_expires_remaining does not take internal hrtimer locks and thus
+must be guarded against concurrent __hrtimer_start_range_ns (but
+returning HRTIMER_RESTART is safe). Use cfs_b->lock to make it safe.
+
+Signed-off-by: Ben Segall <bsegall@google.com>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Cc: pjt@google.com
+Link: http://lkml.kernel.org/r/20131016181617.22647.73829.stgit@sword-of-the-dawn.mtv.corp.google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Chris J Arges <chris.j.arges@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/fair.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -2519,7 +2519,13 @@ static const u64 min_bandwidth_expiratio
+ /* how long we wait to gather additional slack before distributing */
+ static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC;
+ 
+-/* are we near the end of the current quota period? */
++/*
++ * Are we near the end of the current quota period?
++ *
++ * Requires cfs_b->lock for hrtimer_expires_remaining to be safe against the
++ * hrtimer base being cleared by __hrtimer_start_range_ns. In the case of
++ * migrate_hrtimers, base is never cleared, so we are fine.
++ */
+ static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire)
+ {
+       struct hrtimer *refresh_timer = &cfs_b->period_timer;
+@@ -2595,10 +2601,12 @@ static void do_sched_cfs_slack_timer(str
+       u64 expires;
+ 
+       /* confirm we're still not at a refresh boundary */
+-      if (runtime_refresh_within(cfs_b, min_bandwidth_expiration))
++      raw_spin_lock(&cfs_b->lock);
++      if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
++              raw_spin_unlock(&cfs_b->lock);
+               return;
++      }
+ 
+-      raw_spin_lock(&cfs_b->lock);
+       if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) {
+               runtime = cfs_b->runtime;
+               cfs_b->runtime = 0;
diff --git a/queue-3.12/sched-fix-hrtimer_cancel-rq-lock-deadlock.patch b/queue-3.12/sched-fix-hrtimer_cancel-rq-lock-deadlock.patch

new file mode 100644 (file)

index 0000000..1d7cfbf
--- /dev/null
+++ b/queue-3.12/sched-fix-hrtimer_cancel-rq-lock-deadlock.patch
@@ -0,0 +1,63 @@
+From 927b54fccbf04207ec92f669dce6806848cbec7d Mon Sep 17 00:00:00 2001
+From: Ben Segall <bsegall@google.com>
+Date: Wed, 16 Oct 2013 11:16:22 -0700
+Subject: sched: Fix hrtimer_cancel()/rq->lock deadlock
+
+From: Ben Segall <bsegall@google.com>
+
+commit 927b54fccbf04207ec92f669dce6806848cbec7d upstream.
+
+__start_cfs_bandwidth calls hrtimer_cancel while holding rq->lock,
+waiting for the hrtimer to finish. However, if sched_cfs_period_timer
+runs for another loop iteration, the hrtimer can attempt to take
+rq->lock, resulting in deadlock.
+
+Fix this by ensuring that cfs_b->timer_active is cleared only if the
+_latest_ call to do_sched_cfs_period_timer is returning as idle. Then
+__start_cfs_bandwidth can just call hrtimer_try_to_cancel and wait for
+that to succeed or timer_active == 1.
+
+Signed-off-by: Ben Segall <bsegall@google.com>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Cc: pjt@google.com
+Link: http://lkml.kernel.org/r/20131016181622.22647.16643.stgit@sword-of-the-dawn.mtv.corp.google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Chris J Arges <chris.j.arges@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/fair.c |   15 +++++++++++----
+ 1 file changed, 11 insertions(+), 4 deletions(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -2459,6 +2459,13 @@ static int do_sched_cfs_period_timer(str
+       if (idle)
+               goto out_unlock;
+ 
++      /*
++       * if we have relooped after returning idle once, we need to update our
++       * status as actually running, so that other cpus doing
++       * __start_cfs_bandwidth will stop trying to cancel us.
++       */
++      cfs_b->timer_active = 1;
++
+       __refill_cfs_bandwidth_runtime(cfs_b);
+ 
+       if (!throttled) {
+@@ -2727,11 +2734,11 @@ void __start_cfs_bandwidth(struct cfs_ba
+        * (timer_active==0 becomes visible before the hrtimer call-back
+        * terminates).  In either case we ensure that it's re-programmed
+        */
+-      while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
++      while (unlikely(hrtimer_active(&cfs_b->period_timer)) &&
++             hrtimer_try_to_cancel(&cfs_b->period_timer) < 0) {
++              /* bounce the lock to allow do_sched_cfs_period_timer to run */
+               raw_spin_unlock(&cfs_b->lock);
+-              /* ensure cfs_b->lock is available while we wait */
+-              hrtimer_cancel(&cfs_b->period_timer);
+-
++              cpu_relax();
+               raw_spin_lock(&cfs_b->lock);
+               /* if someone else restarted the timer then we're done */
+               if (cfs_b->timer_active)
diff --git a/queue-3.12/sched-fix-race-on-toggling-cfs_bandwidth_used.patch b/queue-3.12/sched-fix-race-on-toggling-cfs_bandwidth_used.patch

new file mode 100644 (file)

index 0000000..f81dfb7
--- /dev/null
+++ b/queue-3.12/sched-fix-race-on-toggling-cfs_bandwidth_used.patch
@@ -0,0 +1,105 @@
+From 1ee14e6c8cddeeb8a490d7b54cd9016e4bb900b4 Mon Sep 17 00:00:00 2001
+From: Ben Segall <bsegall@google.com>
+Date: Wed, 16 Oct 2013 11:16:12 -0700
+Subject: sched: Fix race on toggling cfs_bandwidth_used
+
+From: Ben Segall <bsegall@google.com>
+
+commit 1ee14e6c8cddeeb8a490d7b54cd9016e4bb900b4 upstream.
+
+When we transition cfs_bandwidth_used to false, any currently
+throttled groups will incorrectly return false from cfs_rq_throttled.
+While tg_set_cfs_bandwidth will unthrottle them eventually, currently
+running code (including at least dequeue_task_fair and
+distribute_cfs_runtime) will cause errors.
+
+Fix this by turning off cfs_bandwidth_used only after unthrottling all
+cfs_rqs.
+
+Tested: toggle bandwidth back and forth on a loaded cgroup. Caused
+crashes in minutes without the patch, hasn't crashed with it.
+
+Signed-off-by: Ben Segall <bsegall@google.com>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Cc: pjt@google.com
+Link: http://lkml.kernel.org/r/20131016181611.22647.80365.stgit@sword-of-the-dawn.mtv.corp.google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Chris J Arges <chris.j.arges@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/core.c  |    9 ++++++++-
+ kernel/sched/fair.c  |   16 +++++++++-------
+ kernel/sched/sched.h |    3 ++-
+ 3 files changed, 19 insertions(+), 9 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -7277,7 +7277,12 @@ static int tg_set_cfs_bandwidth(struct t
+ 
+       runtime_enabled = quota != RUNTIME_INF;
+       runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
+-      account_cfs_bandwidth_used(runtime_enabled, runtime_was_enabled);
++      /*
++       * If we need to toggle cfs_bandwidth_used, off->on must occur
++       * before making related changes, and on->off must occur afterwards
++       */
++      if (runtime_enabled && !runtime_was_enabled)
++              cfs_bandwidth_usage_inc();
+       raw_spin_lock_irq(&cfs_b->lock);
+       cfs_b->period = ns_to_ktime(period);
+       cfs_b->quota = quota;
+@@ -7303,6 +7308,8 @@ static int tg_set_cfs_bandwidth(struct t
+                       unthrottle_cfs_rq(cfs_rq);
+               raw_spin_unlock_irq(&rq->lock);
+       }
++      if (runtime_was_enabled && !runtime_enabled)
++              cfs_bandwidth_usage_dec();
+ out_unlock:
+       mutex_unlock(&cfs_constraints_mutex);
+ 
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -2077,13 +2077,14 @@ static inline bool cfs_bandwidth_used(vo
+       return static_key_false(&__cfs_bandwidth_used);
+ }
+ 
+-void account_cfs_bandwidth_used(int enabled, int was_enabled)
++void cfs_bandwidth_usage_inc(void)
+ {
+-      /* only need to count groups transitioning between enabled/!enabled */
+-      if (enabled && !was_enabled)
+-              static_key_slow_inc(&__cfs_bandwidth_used);
+-      else if (!enabled && was_enabled)
+-              static_key_slow_dec(&__cfs_bandwidth_used);
++      static_key_slow_inc(&__cfs_bandwidth_used);
++}
++
++void cfs_bandwidth_usage_dec(void)
++{
++      static_key_slow_dec(&__cfs_bandwidth_used);
+ }
+ #else /* HAVE_JUMP_LABEL */
+ static bool cfs_bandwidth_used(void)
+@@ -2091,7 +2092,8 @@ static bool cfs_bandwidth_used(void)
+       return true;
+ }
+ 
+-void account_cfs_bandwidth_used(int enabled, int was_enabled) {}
++void cfs_bandwidth_usage_inc(void) {}
++void cfs_bandwidth_usage_dec(void) {}
+ #endif /* HAVE_JUMP_LABEL */
+ 
+ /*
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1305,7 +1305,8 @@ extern void print_rt_stats(struct seq_fi
+ extern void init_cfs_rq(struct cfs_rq *cfs_rq);
+ extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
+ 
+-extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
++extern void cfs_bandwidth_usage_inc(void);
++extern void cfs_bandwidth_usage_dec(void);
+ 
+ #ifdef CONFIG_NO_HZ_COMMON
+ enum rq_nohz_flag_bits {
diff --git a/queue-3.12/sched-guarantee-new-group-entities-always-have-weight.patch b/queue-3.12/sched-guarantee-new-group-entities-always-have-weight.patch

new file mode 100644 (file)

index 0000000..1842ad3
--- /dev/null
+++ b/queue-3.12/sched-guarantee-new-group-entities-always-have-weight.patch
@@ -0,0 +1,61 @@
+From 0ac9b1c21874d2490331233b3242085f8151e166 Mon Sep 17 00:00:00 2001
+From: Paul Turner <pjt@google.com>
+Date: Wed, 16 Oct 2013 11:16:27 -0700
+Subject: sched: Guarantee new group-entities always have weight
+
+From: Paul Turner <pjt@google.com>
+
+commit 0ac9b1c21874d2490331233b3242085f8151e166 upstream.
+
+Currently, group entity load-weights are initialized to zero. This
+admits some races with respect to the first time they are re-weighted in
+earlty use. ( Let g[x] denote the se for "g" on cpu "x". )
+
+Suppose that we have root->a and that a enters a throttled state,
+immediately followed by a[0]->t1 (the only task running on cpu[0])
+blocking:
+
+  put_prev_task(group_cfs_rq(a[0]), t1)
+  put_prev_entity(..., t1)
+  check_cfs_rq_runtime(group_cfs_rq(a[0]))
+  throttle_cfs_rq(group_cfs_rq(a[0]))
+
+Then, before unthrottling occurs, let a[0]->b[0]->t2 wake for the first
+time:
+
+  enqueue_task_fair(rq[0], t2)
+  enqueue_entity(group_cfs_rq(b[0]), t2)
+  enqueue_entity_load_avg(group_cfs_rq(b[0]), t2)
+  account_entity_enqueue(group_cfs_ra(b[0]), t2)
+  update_cfs_shares(group_cfs_rq(b[0]))
+  < skipped because b is part of a throttled hierarchy >
+  enqueue_entity(group_cfs_rq(a[0]), b[0])
+  ...
+
+We now have b[0] enqueued, yet group_cfs_rq(a[0])->load.weight == 0
+which violates invariants in several code-paths. Eliminate the
+possibility of this by initializing group entity weight.
+
+Signed-off-by: Paul Turner <pjt@google.com>
+Signed-off-by: Peter Zijlstra <peterz@infradead.org>
+Link: http://lkml.kernel.org/r/20131016181627.22647.47543.stgit@sword-of-the-dawn.mtv.corp.google.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Chris J Arges <chris.j.arges@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/fair.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -6240,7 +6240,8 @@ void init_tg_cfs_entry(struct task_group
+               se->cfs_rq = parent->my_q;
+ 
+       se->my_q = cfs_rq;
+-      update_load_set(&se->load, 0);
++      /* guarantee group entities always have weight */
++      update_load_set(&se->load, NICE_0_LOAD);
+       se->parent = parent;
+ }
+ 
diff --git a/queue-3.12/series b/queue-3.12/series

index af2d60d42eb710c1d689b5cf92b8dff57dfbb4b5..aeec1112a7adf53b510902b79202be0abe22dba1 100644 (file)
--- a/queue-3.12/series
+++ b/queue-3.12/series
@@ -71,3 +71,7 @@ scsi-sd-reduce-buffer-size-for-vpd-request.patch
  netfilter-fix-wrong-byte-order-in-nf_ct_seqadj_set-internal-information.patch
  netfilter-nf_nat-fix-access-to-uninitialized-buffer-in-irc-nat-helper.patch
  x86-fpu-amd-clear-exceptions-in-amd-fxsave-workaround.patch
+sched-fix-race-on-toggling-cfs_bandwidth_used.patch
+sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch
+sched-fix-hrtimer_cancel-rq-lock-deadlock.patch
+sched-guarantee-new-group-entities-always-have-weight.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 13 Jan 2014 18:28:23 +0000 (10:28 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 13 Jan 2014 18:28:23 +0000 (10:28 -0800)
queue-3.12/sched-fix-cfs_bandwidth-misuse-of-hrtimer_expires_remaining.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/sched-fix-hrtimer_cancel-rq-lock-deadlock.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/sched-fix-race-on-toggling-cfs_bandwidth_used.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/sched-guarantee-new-group-entities-always-have-weight.patch	[new file with mode: 0644]	patch \| blob
queue-3.12/series		patch \| blob \| blame \| history