]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.8.5/sched-fair-fix-min_vruntime-tracking.patch
4.9-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 4.8.5 / sched-fair-fix-min_vruntime-tracking.patch
1 From b60205c7c558330e4e2b5df498355ec959457358 Mon Sep 17 00:00:00 2001
2 From: Peter Zijlstra <peterz@infradead.org>
3 Date: Tue, 20 Sep 2016 21:58:12 +0200
4 Subject: sched/fair: Fix min_vruntime tracking
5
6 From: Peter Zijlstra <peterz@infradead.org>
7
8 commit b60205c7c558330e4e2b5df498355ec959457358 upstream.
9
10 While going through enqueue/dequeue to review the movement of
11 set_curr_task() I noticed that the (2nd) update_min_vruntime() call in
12 dequeue_entity() is suspect.
13
14 It turns out, its actually wrong because it will consider
15 cfs_rq->curr, which could be the entry we just normalized. This mixes
16 different vruntime forms and leads to fail.
17
18 The purpose of the second update_min_vruntime() is to move
19 min_vruntime forward if the entity we just removed is the one that was
20 holding it back; _except_ for the DEQUEUE_SAVE case, because then we
21 know its a temporary removal and it will come back.
22
23 However, since we do put_prev_task() _after_ dequeue(), cfs_rq->curr
24 will still be set (and per the above, can be tranformed into a
25 different unit), so update_min_vruntime() should also consider
26 curr->on_rq. This also fixes another corner case where the enqueue
27 (which also does update_curr()->update_min_vruntime()) happens on the
28 rq->lock break in schedule(), between dequeue and put_prev_task.
29
30 Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
31 Cc: Linus Torvalds <torvalds@linux-foundation.org>
32 Cc: Mike Galbraith <efault@gmx.de>
33 Cc: Peter Zijlstra <peterz@infradead.org>
34 Cc: Thomas Gleixner <tglx@linutronix.de>
35 Cc: linux-kernel@vger.kernel.org
36 Fixes: 1e876231785d ("sched: Fix ->min_vruntime calculation in dequeue_entity()")
37 Signed-off-by: Ingo Molnar <mingo@kernel.org>
38 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
39
40 ---
41 kernel/sched/fair.c | 29 ++++++++++++++++++++++-------
42 1 file changed, 22 insertions(+), 7 deletions(-)
43
44 --- a/kernel/sched/fair.c
45 +++ b/kernel/sched/fair.c
46 @@ -456,17 +456,23 @@ static inline int entity_before(struct s
47
48 static void update_min_vruntime(struct cfs_rq *cfs_rq)
49 {
50 + struct sched_entity *curr = cfs_rq->curr;
51 +
52 u64 vruntime = cfs_rq->min_vruntime;
53
54 - if (cfs_rq->curr)
55 - vruntime = cfs_rq->curr->vruntime;
56 + if (curr) {
57 + if (curr->on_rq)
58 + vruntime = curr->vruntime;
59 + else
60 + curr = NULL;
61 + }
62
63 if (cfs_rq->rb_leftmost) {
64 struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
65 struct sched_entity,
66 run_node);
67
68 - if (!cfs_rq->curr)
69 + if (!curr)
70 vruntime = se->vruntime;
71 else
72 vruntime = min_vruntime(vruntime, se->vruntime);
73 @@ -3466,9 +3472,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
74 account_entity_dequeue(cfs_rq, se);
75
76 /*
77 - * Normalize the entity after updating the min_vruntime because the
78 - * update can refer to the ->curr item and we need to reflect this
79 - * movement in our normalized position.
80 + * Normalize after update_curr(); which will also have moved
81 + * min_vruntime if @se is the one holding it back. But before doing
82 + * update_min_vruntime() again, which will discount @se's position and
83 + * can move min_vruntime forward still more.
84 */
85 if (!(flags & DEQUEUE_SLEEP))
86 se->vruntime -= cfs_rq->min_vruntime;
87 @@ -3476,8 +3483,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
88 /* return excess runtime on last dequeue */
89 return_cfs_rq_runtime(cfs_rq);
90
91 - update_min_vruntime(cfs_rq);
92 update_cfs_shares(cfs_rq);
93 +
94 + /*
95 + * Now advance min_vruntime if @se was the entity holding it back,
96 + * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
97 + * put back on, and if we advance min_vruntime, we'll be placed back
98 + * further than we started -- ie. we'll be penalized.
99 + */
100 + if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
101 + update_min_vruntime(cfs_rq);
102 }
103
104 /*