]>
Commit | Line | Data |
---|---|---|
82b2f3ba SL |
1 | From 53403636e536d1fc660449a56f1604a4d2f6a708 Mon Sep 17 00:00:00 2001 |
2 | From: Phil Auld <pauld@redhat.com> | |
3 | Date: Tue, 23 Apr 2019 19:51:06 -0400 | |
4 | Subject: sched/fair: Limit sched_cfs_period_timer() loop to avoid hard lockup | |
5 | ||
6 | [ Upstream commit 2e8e19226398db8265a8e675fcc0118b9e80c9e8 ] | |
7 | ||
8 | With extremely short cfs_period_us setting on a parent task group with a large | |
9 | number of children the for loop in sched_cfs_period_timer() can run until the | |
10 | watchdog fires. There is no guarantee that the call to hrtimer_forward_now() | |
11 | will ever return 0. The large number of children can make | |
12 | do_sched_cfs_period_timer() take longer than the period. | |
13 | ||
14 | NMI watchdog: Watchdog detected hard LOCKUP on cpu 24 | |
15 | RIP: 0010:tg_nop+0x0/0x10 | |
16 | <IRQ> | |
17 | walk_tg_tree_from+0x29/0xb0 | |
18 | unthrottle_cfs_rq+0xe0/0x1a0 | |
19 | distribute_cfs_runtime+0xd3/0xf0 | |
20 | sched_cfs_period_timer+0xcb/0x160 | |
21 | ? sched_cfs_slack_timer+0xd0/0xd0 | |
22 | __hrtimer_run_queues+0xfb/0x270 | |
23 | hrtimer_interrupt+0x122/0x270 | |
24 | smp_apic_timer_interrupt+0x6a/0x140 | |
25 | apic_timer_interrupt+0xf/0x20 | |
26 | </IRQ> | |
27 | ||
28 | To prevent this we add protection to the loop that detects when the loop has run | |
29 | too many times and scales the period and quota up, proportionally, so that the timer | |
30 | can complete before then next period expires. This preserves the relative runtime | |
31 | quota while preventing the hard lockup. | |
32 | ||
33 | A warning is issued reporting this state and the new values. | |
34 | ||
35 | Signed-off-by: Phil Auld <pauld@redhat.com> | |
36 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> | |
37 | Cc: <stable@vger.kernel.org> | |
38 | Cc: Anton Blanchard <anton@ozlabs.org> | |
39 | Cc: Ben Segall <bsegall@google.com> | |
40 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
41 | Cc: Peter Zijlstra <peterz@infradead.org> | |
42 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
43 | Link: https://lkml.kernel.org/r/20190319130005.25492-1-pauld@redhat.com | |
44 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
45 | Signed-off-by: Sasha Levin <sashal@kernel.org> | |
46 | --- | |
47 | kernel/sched/fair.c | 25 +++++++++++++++++++++++++ | |
48 | 1 file changed, 25 insertions(+) | |
49 | ||
50 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | |
51 | index 9829ede00498..a5d163903835 100644 | |
52 | --- a/kernel/sched/fair.c | |
53 | +++ b/kernel/sched/fair.c | |
54 | @@ -4672,12 +4672,15 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) | |
55 | return HRTIMER_NORESTART; | |
56 | } | |
57 | ||
58 | +extern const u64 max_cfs_quota_period; | |
59 | + | |
60 | static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | |
61 | { | |
62 | struct cfs_bandwidth *cfs_b = | |
63 | container_of(timer, struct cfs_bandwidth, period_timer); | |
64 | int overrun; | |
65 | int idle = 0; | |
66 | + int count = 0; | |
67 | ||
68 | raw_spin_lock(&cfs_b->lock); | |
69 | for (;;) { | |
70 | @@ -4685,6 +4688,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | |
71 | if (!overrun) | |
72 | break; | |
73 | ||
74 | + if (++count > 3) { | |
75 | + u64 new, old = ktime_to_ns(cfs_b->period); | |
76 | + | |
77 | + new = (old * 147) / 128; /* ~115% */ | |
78 | + new = min(new, max_cfs_quota_period); | |
79 | + | |
80 | + cfs_b->period = ns_to_ktime(new); | |
81 | + | |
82 | + /* since max is 1s, this is limited to 1e9^2, which fits in u64 */ | |
83 | + cfs_b->quota *= new; | |
84 | + cfs_b->quota = div64_u64(cfs_b->quota, old); | |
85 | + | |
86 | + pr_warn_ratelimited( | |
87 | + "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n", | |
88 | + smp_processor_id(), | |
89 | + div_u64(new, NSEC_PER_USEC), | |
90 | + div_u64(cfs_b->quota, NSEC_PER_USEC)); | |
91 | + | |
92 | + /* reset count so we don't come right back in here */ | |
93 | + count = 0; | |
94 | + } | |
95 | + | |
96 | idle = do_sched_cfs_period_timer(cfs_b, overrun); | |
97 | } | |
98 | if (idle) | |
99 | -- | |
100 | 2.19.1 | |
101 |