patches for 3.18

author Sasha Levin <sashal@kernel.org>

Wed, 24 Apr 2019 01:27:06 +0000 (21:27 -0400)

committer Sasha Levin <sashal@kernel.org>

Wed, 24 Apr 2019 01:27:06 +0000 (21:27 -0400)
author Sasha Levin <sashal@kernel.org>
Wed, 24 Apr 2019 01:27:06 +0000 (21:27 -0400)
committer Sasha Levin <sashal@kernel.org>
Wed, 24 Apr 2019 01:27:06 +0000 (21:27 -0400)
diff --git a/queue-3.18/sched-fair-limit-sched_cfs_period_timer-loop-to-avoi.patch b/queue-3.18/sched-fair-limit-sched_cfs_period_timer-loop-to-avoi.patch

new file mode 100644 (file)

index 0000000..85f6309
--- /dev/null
+++ b/queue-3.18/sched-fair-limit-sched_cfs_period_timer-loop-to-avoi.patch
@@ -0,0 +1,102 @@
+From 23c18a212c37d53a8e369a8cd819d32748b37a08 Mon Sep 17 00:00:00 2001
+From: Phil Auld <pauld@redhat.com>
+Date: Tue, 23 Apr 2019 19:51:06 -0400
+Subject: sched/fair: Limit sched_cfs_period_timer() loop to avoid hard lockup
+
+[ Upstream commit 2e8e19226398db8265a8e675fcc0118b9e80c9e8 ]
+
+With extremely short cfs_period_us setting on a parent task group with a large
+number of children the for loop in sched_cfs_period_timer() can run until the
+watchdog fires. There is no guarantee that the call to hrtimer_forward_now()
+will ever return 0.  The large number of children can make
+do_sched_cfs_period_timer() take longer than the period.
+
+ NMI watchdog: Watchdog detected hard LOCKUP on cpu 24
+ RIP: 0010:tg_nop+0x0/0x10
+  <IRQ>
+  walk_tg_tree_from+0x29/0xb0
+  unthrottle_cfs_rq+0xe0/0x1a0
+  distribute_cfs_runtime+0xd3/0xf0
+  sched_cfs_period_timer+0xcb/0x160
+  ? sched_cfs_slack_timer+0xd0/0xd0
+  __hrtimer_run_queues+0xfb/0x270
+  hrtimer_interrupt+0x122/0x270
+  smp_apic_timer_interrupt+0x6a/0x140
+  apic_timer_interrupt+0xf/0x20
+  </IRQ>
+
+To prevent this we add protection to the loop that detects when the loop has run
+too many times and scales the period and quota up, proportionally, so that the timer
+can complete before then next period expires.  This preserves the relative runtime
+quota while preventing the hard lockup.
+
+A warning is issued reporting this state and the new values.
+
+Signed-off-by: Phil Auld <pauld@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: <stable@vger.kernel.org>
+Cc: Anton Blanchard <anton@ozlabs.org>
+Cc: Ben Segall <bsegall@google.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lkml.kernel.org/r/20190319130005.25492-1-pauld@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 25 +++++++++++++++++++++++++
+ 1 file changed, 25 insertions(+)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 4aa559b09e9c..9020fbe8d785 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3771,6 +3771,8 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
+       return HRTIMER_NORESTART;
+ }
+ 
++extern const u64 max_cfs_quota_period;
++
+ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
+ {
+       struct cfs_bandwidth *cfs_b =
+@@ -3778,6 +3780,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
+       ktime_t now;
+       int overrun;
+       int idle = 0;
++      int count = 0;
+ 
+       raw_spin_lock(&cfs_b->lock);
+       for (;;) {
+@@ -3787,6 +3790,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
+               if (!overrun)
+                       break;
+ 
++              if (++count > 3) {
++                      u64 new, old = ktime_to_ns(cfs_b->period);
++
++                      new = (old * 147) / 128; /* ~115% */
++                      new = min(new, max_cfs_quota_period);
++
++                      cfs_b->period = ns_to_ktime(new);
++
++                      /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
++                      cfs_b->quota *= new;
++                      cfs_b->quota = div64_u64(cfs_b->quota, old);
++
++                      pr_warn_ratelimited(
++        "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
++                              smp_processor_id(),
++                              div_u64(new, NSEC_PER_USEC),
++                                div_u64(cfs_b->quota, NSEC_PER_USEC));
++
++                      /* reset count so we don't come right back in here */
++                      count = 0;
++              }
++
+               idle = do_sched_cfs_period_timer(cfs_b, overrun);
+       }
+       raw_spin_unlock(&cfs_b->lock);
+-- 
+2.19.1
+
diff --git a/queue-3.18/series b/queue-3.18/series

index b7d56137b21989d1fa3d38f855edcb1358efd483..f21f6435ea1c3d9156dd625e2c6366d116117b0b 100644 (file)
--- a/queue-3.18/series
+++ b/queue-3.18/series
@@ -101,3 +101,4 @@ alsa-core-fix-card-races-between-register-and-disconnect.patch
  x86-kprobes-verify-stack-frame-on-kretprobe.patch
  kprobes-fix-error-check-when-reusing-optimized-probes.patch
  mac80211-do-not-call-driver-wake_tx_queue-op-during-reconfig.patch
+sched-fair-limit-sched_cfs_period_timer-loop-to-avoi.patch
author	Sasha Levin <sashal@kernel.org>
	Wed, 24 Apr 2019 01:27:06 +0000 (21:27 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Wed, 24 Apr 2019 01:27:06 +0000 (21:27 -0400)
queue-3.18/sched-fair-limit-sched_cfs_period_timer-loop-to-avoi.patch	[new file with mode: 0644]	patch \| blob
queue-3.18/series		patch \| blob \| blame \| history