From: Greg Kroah-Hartman Date: Tue, 3 Mar 2020 15:43:14 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v4.19.108~25 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e495e824ea2e37c780edbb4ef0ed3883e53a0fc1;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch sched-fair-optimize-update_blocked_averages.patch --- diff --git a/queue-4.19/sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch b/queue-4.19/sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch new file mode 100644 index 00000000000..3d4ef96cf5b --- /dev/null +++ b/queue-4.19/sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch @@ -0,0 +1,127 @@ +From 039ae8bcf7a5f4476f4487e6bf816885fb3fb617 Mon Sep 17 00:00:00 2001 +From: Vincent Guittot +Date: Wed, 6 Feb 2019 17:14:22 +0100 +Subject: sched/fair: Fix O(nr_cgroups) in the load balancing path + +From: Vincent Guittot + +commit 039ae8bcf7a5f4476f4487e6bf816885fb3fb617 upstream. + +This re-applies the commit reverted here: + + commit c40f7d74c741 ("sched/fair: Fix infinite loop in update_blocked_averages() by reverting a9e7f6544b9c") + +I.e. now that cfs_rq can be safely removed/added in the list, we can re-apply: + + commit a9e7f6544b9c ("sched/fair: Fix O(nr_cgroups) in load balance path") + +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: sargun@sargun.me +Cc: tj@kernel.org +Cc: xiexiuqi@huawei.com +Cc: xiezhipeng1@huawei.com +Link: https://lkml.kernel.org/r/1549469662-13614-3-git-send-email-vincent.guittot@linaro.org +Signed-off-by: Ingo Molnar +Cc: Vishnu Rangayyan +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 43 ++++++++++++++++++++++++++++++++++--------- + 1 file changed, 34 insertions(+), 9 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -375,9 +375,10 @@ static inline void assert_list_leaf_cfs_ + SCHED_WARN_ON(rq->tmp_alone_branch != &rq->leaf_cfs_rq_list); + } + +-/* Iterate through all cfs_rq's on a runqueue in bottom-up order */ +-#define for_each_leaf_cfs_rq(rq, cfs_rq) \ +- list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) ++/* Iterate thr' all leaf cfs_rq's on a runqueue */ ++#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \ ++ list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list, \ ++ leaf_cfs_rq_list) + + /* Do the two (enqueued) entities belong to the same group ? */ + static inline struct cfs_rq * +@@ -474,8 +475,8 @@ static inline void assert_list_leaf_cfs_ + { + } + +-#define for_each_leaf_cfs_rq(rq, cfs_rq) \ +- for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) ++#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \ ++ for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos) + + static inline struct sched_entity *parent_entity(struct sched_entity *se) + { +@@ -7461,10 +7462,27 @@ static inline bool others_have_blocked(s + + #ifdef CONFIG_FAIR_GROUP_SCHED + ++static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) ++{ ++ if (cfs_rq->load.weight) ++ return false; ++ ++ if (cfs_rq->avg.load_sum) ++ return false; ++ ++ if (cfs_rq->avg.util_sum) ++ return false; ++ ++ if (cfs_rq->avg.runnable_load_sum) ++ return false; ++ ++ return true; ++} ++ + static void update_blocked_averages(int cpu) + { + struct rq *rq = cpu_rq(cpu); +- struct cfs_rq *cfs_rq; ++ struct cfs_rq *cfs_rq, *pos; + const struct sched_class *curr_class; + struct rq_flags rf; + bool done = true; +@@ -7476,7 +7494,7 @@ static void update_blocked_averages(int + * Iterates the task_group tree in a bottom up fashion, see + * list_add_leaf_cfs_rq() for details. + */ +- for_each_leaf_cfs_rq(rq, cfs_rq) { ++ for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) { + struct sched_entity *se; + + if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq)) +@@ -7487,6 +7505,13 @@ static void update_blocked_averages(int + if (se && !skip_blocked_update(se)) + update_load_avg(cfs_rq_of(se), se, 0); + ++ /* ++ * There can be a lot of idle CPU cgroups. Don't let fully ++ * decayed cfs_rqs linger on the list. ++ */ ++ if (cfs_rq_is_decayed(cfs_rq)) ++ list_del_leaf_cfs_rq(cfs_rq); ++ + /* Don't need periodic decay once load/util_avg are null */ + if (cfs_rq_has_blocked(cfs_rq)) + done = false; +@@ -10272,10 +10297,10 @@ const struct sched_class fair_sched_clas + #ifdef CONFIG_SCHED_DEBUG + void print_cfs_stats(struct seq_file *m, int cpu) + { +- struct cfs_rq *cfs_rq; ++ struct cfs_rq *cfs_rq, *pos; + + rcu_read_lock(); +- for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq) ++ for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos) + print_cfs_rq(m, cpu, cfs_rq); + rcu_read_unlock(); + } diff --git a/queue-4.19/sched-fair-optimize-update_blocked_averages.patch b/queue-4.19/sched-fair-optimize-update_blocked_averages.patch new file mode 100644 index 00000000000..e44f618721e --- /dev/null +++ b/queue-4.19/sched-fair-optimize-update_blocked_averages.patch @@ -0,0 +1,111 @@ +From 31bc6aeaab1d1de8959b67edbed5c7a4b3cdbe7c Mon Sep 17 00:00:00 2001 +From: Vincent Guittot +Date: Wed, 6 Feb 2019 17:14:21 +0100 +Subject: sched/fair: Optimize update_blocked_averages() + +From: Vincent Guittot + +commit 31bc6aeaab1d1de8959b67edbed5c7a4b3cdbe7c upstream. + +Removing a cfs_rq from rq->leaf_cfs_rq_list can break the parent/child +ordering of the list when it will be added back. In order to remove an +empty and fully decayed cfs_rq, we must remove its children too, so they +will be added back in the right order next time. + +With a normal decay of PELT, a parent will be empty and fully decayed +if all children are empty and fully decayed too. In such a case, we just +have to ensure that the whole branch will be added when a new task is +enqueued. This is default behavior since : + + commit f6783319737f ("sched/fair: Fix insertion in rq->leaf_cfs_rq_list") + +In case of throttling, the PELT of throttled cfs_rq will not be updated +whereas the parent will. This breaks the assumption made above unless we +remove the children of a cfs_rq that is throttled. Then, they will be +added back when unthrottled and a sched_entity will be enqueued. + +As throttled cfs_rq are now removed from the list, we can remove the +associated test in update_blocked_averages(). + +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: sargun@sargun.me +Cc: tj@kernel.org +Cc: xiexiuqi@huawei.com +Cc: xiezhipeng1@huawei.com +Link: https://lkml.kernel.org/r/1549469662-13614-2-git-send-email-vincent.guittot@linaro.org +Signed-off-by: Ingo Molnar +Cc: Vishnu Rangayyan +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 26 +++++++++++++++++++++----- + 1 file changed, 21 insertions(+), 5 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -353,6 +353,18 @@ static inline bool list_add_leaf_cfs_rq( + static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) + { + if (cfs_rq->on_list) { ++ struct rq *rq = rq_of(cfs_rq); ++ ++ /* ++ * With cfs_rq being unthrottled/throttled during an enqueue, ++ * it can happen the tmp_alone_branch points the a leaf that ++ * we finally want to del. In this case, tmp_alone_branch moves ++ * to the prev element but it will point to rq->leaf_cfs_rq_list ++ * at the end of the enqueue. ++ */ ++ if (rq->tmp_alone_branch == &cfs_rq->leaf_cfs_rq_list) ++ rq->tmp_alone_branch = cfs_rq->leaf_cfs_rq_list.prev; ++ + list_del_rcu(&cfs_rq->leaf_cfs_rq_list); + cfs_rq->on_list = 0; + } +@@ -4441,6 +4453,10 @@ static int tg_unthrottle_up(struct task_ + /* adjust cfs_rq_clock_task() */ + cfs_rq->throttled_clock_task_time += rq_clock_task(rq) - + cfs_rq->throttled_clock_task; ++ ++ /* Add cfs_rq with already running entity in the list */ ++ if (cfs_rq->nr_running >= 1) ++ list_add_leaf_cfs_rq(cfs_rq); + } + + return 0; +@@ -4452,8 +4468,10 @@ static int tg_throttle_down(struct task_ + struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; + + /* group is entering throttled state, stop time */ +- if (!cfs_rq->throttle_count) ++ if (!cfs_rq->throttle_count) { + cfs_rq->throttled_clock_task = rq_clock_task(rq); ++ list_del_leaf_cfs_rq(cfs_rq); ++ } + cfs_rq->throttle_count++; + + return 0; +@@ -4556,6 +4574,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cf + break; + } + ++ assert_list_leaf_cfs_rq(rq); ++ + if (!se) + add_nr_running(rq, task_delta); + +@@ -7459,10 +7479,6 @@ static void update_blocked_averages(int + for_each_leaf_cfs_rq(rq, cfs_rq) { + struct sched_entity *se; + +- /* throttled entities do not contribute to load */ +- if (throttled_hierarchy(cfs_rq)) +- continue; +- + if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq)) + update_tg_load_avg(cfs_rq, 0); + diff --git a/queue-4.19/series b/queue-4.19/series index 572452a55af..93671ab53cd 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -69,3 +69,5 @@ mwifiex-drop-most-magic-numbers-from-mwifiex_process_tdls_action_frame.patch mwifiex-delete-unused-mwifiex_get_intf_num.patch kvm-svm-override-default-mmio-mask-if-memory-encryption-is-enabled.patch kvm-check-for-a-bad-hva-before-dropping-into-the-ghc-slow-path.patch +sched-fair-optimize-update_blocked_averages.patch +sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch