4.19-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 3 Mar 2020 15:43:14 +0000 (16:43 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 3 Mar 2020 15:43:14 +0000 (16:43 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 3 Mar 2020 15:43:14 +0000 (16:43 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 3 Mar 2020 15:43:14 +0000 (16:43 +0100)
diff --git a/queue-4.19/sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch b/queue-4.19/sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch

new file mode 100644 (file)

index 0000000..3d4ef96
--- /dev/null
+++ b/queue-4.19/sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch
@@ -0,0 +1,127 @@
+From 039ae8bcf7a5f4476f4487e6bf816885fb3fb617 Mon Sep 17 00:00:00 2001
+From: Vincent Guittot <vincent.guittot@linaro.org>
+Date: Wed, 6 Feb 2019 17:14:22 +0100
+Subject: sched/fair: Fix O(nr_cgroups) in the load balancing path
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+commit 039ae8bcf7a5f4476f4487e6bf816885fb3fb617 upstream.
+
+This re-applies the commit reverted here:
+
+  commit c40f7d74c741 ("sched/fair: Fix infinite loop in update_blocked_averages() by reverting a9e7f6544b9c")
+
+I.e. now that cfs_rq can be safely removed/added in the list, we can re-apply:
+
+ commit a9e7f6544b9c ("sched/fair: Fix O(nr_cgroups) in load balance path")
+
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: sargun@sargun.me
+Cc: tj@kernel.org
+Cc: xiexiuqi@huawei.com
+Cc: xiezhipeng1@huawei.com
+Link: https://lkml.kernel.org/r/1549469662-13614-3-git-send-email-vincent.guittot@linaro.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Vishnu Rangayyan <vishnu.rangayyan@apple.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/fair.c |   43 ++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 34 insertions(+), 9 deletions(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -375,9 +375,10 @@ static inline void assert_list_leaf_cfs_
+       SCHED_WARN_ON(rq->tmp_alone_branch != &rq->leaf_cfs_rq_list);
+ }
+ 
+-/* Iterate through all cfs_rq's on a runqueue in bottom-up order */
+-#define for_each_leaf_cfs_rq(rq, cfs_rq) \
+-      list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
++/* Iterate thr' all leaf cfs_rq's on a runqueue */
++#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)                    \
++      list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list,    \
++                               leaf_cfs_rq_list)
+ 
+ /* Do the two (enqueued) entities belong to the same group ? */
+ static inline struct cfs_rq *
+@@ -474,8 +475,8 @@ static inline void assert_list_leaf_cfs_
+ {
+ }
+ 
+-#define for_each_leaf_cfs_rq(rq, cfs_rq)      \
+-              for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
++#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)    \
++              for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos)
+ 
+ static inline struct sched_entity *parent_entity(struct sched_entity *se)
+ {
+@@ -7461,10 +7462,27 @@ static inline bool others_have_blocked(s
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 
++static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
++{
++      if (cfs_rq->load.weight)
++              return false;
++
++      if (cfs_rq->avg.load_sum)
++              return false;
++
++      if (cfs_rq->avg.util_sum)
++              return false;
++
++      if (cfs_rq->avg.runnable_load_sum)
++              return false;
++
++      return true;
++}
++
+ static void update_blocked_averages(int cpu)
+ {
+       struct rq *rq = cpu_rq(cpu);
+-      struct cfs_rq *cfs_rq;
++      struct cfs_rq *cfs_rq, *pos;
+       const struct sched_class *curr_class;
+       struct rq_flags rf;
+       bool done = true;
+@@ -7476,7 +7494,7 @@ static void update_blocked_averages(int
+        * Iterates the task_group tree in a bottom up fashion, see
+        * list_add_leaf_cfs_rq() for details.
+        */
+-      for_each_leaf_cfs_rq(rq, cfs_rq) {
++      for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
+               struct sched_entity *se;
+ 
+               if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+@@ -7487,6 +7505,13 @@ static void update_blocked_averages(int
+               if (se && !skip_blocked_update(se))
+                       update_load_avg(cfs_rq_of(se), se, 0);
+ 
++              /*
++               * There can be a lot of idle CPU cgroups.  Don't let fully
++               * decayed cfs_rqs linger on the list.
++               */
++              if (cfs_rq_is_decayed(cfs_rq))
++                      list_del_leaf_cfs_rq(cfs_rq);
++
+               /* Don't need periodic decay once load/util_avg are null */
+               if (cfs_rq_has_blocked(cfs_rq))
+                       done = false;
+@@ -10272,10 +10297,10 @@ const struct sched_class fair_sched_clas
+ #ifdef CONFIG_SCHED_DEBUG
+ void print_cfs_stats(struct seq_file *m, int cpu)
+ {
+-      struct cfs_rq *cfs_rq;
++      struct cfs_rq *cfs_rq, *pos;
+ 
+       rcu_read_lock();
+-      for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
++      for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
+               print_cfs_rq(m, cpu, cfs_rq);
+       rcu_read_unlock();
+ }
diff --git a/queue-4.19/sched-fair-optimize-update_blocked_averages.patch b/queue-4.19/sched-fair-optimize-update_blocked_averages.patch

new file mode 100644 (file)

index 0000000..e44f618
--- /dev/null
+++ b/queue-4.19/sched-fair-optimize-update_blocked_averages.patch
@@ -0,0 +1,111 @@
+From 31bc6aeaab1d1de8959b67edbed5c7a4b3cdbe7c Mon Sep 17 00:00:00 2001
+From: Vincent Guittot <vincent.guittot@linaro.org>
+Date: Wed, 6 Feb 2019 17:14:21 +0100
+Subject: sched/fair: Optimize update_blocked_averages()
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+commit 31bc6aeaab1d1de8959b67edbed5c7a4b3cdbe7c upstream.
+
+Removing a cfs_rq from rq->leaf_cfs_rq_list can break the parent/child
+ordering of the list when it will be added back. In order to remove an
+empty and fully decayed cfs_rq, we must remove its children too, so they
+will be added back in the right order next time.
+
+With a normal decay of PELT, a parent will be empty and fully decayed
+if all children are empty and fully decayed too. In such a case, we just
+have to ensure that the whole branch will be added when a new task is
+enqueued. This is default behavior since :
+
+  commit f6783319737f ("sched/fair: Fix insertion in rq->leaf_cfs_rq_list")
+
+In case of throttling, the PELT of throttled cfs_rq will not be updated
+whereas the parent will. This breaks the assumption made above unless we
+remove the children of a cfs_rq that is throttled. Then, they will be
+added back when unthrottled and a sched_entity will be enqueued.
+
+As throttled cfs_rq are now removed from the list, we can remove the
+associated test in update_blocked_averages().
+
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: sargun@sargun.me
+Cc: tj@kernel.org
+Cc: xiexiuqi@huawei.com
+Cc: xiezhipeng1@huawei.com
+Link: https://lkml.kernel.org/r/1549469662-13614-2-git-send-email-vincent.guittot@linaro.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Vishnu Rangayyan <vishnu.rangayyan@apple.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/fair.c |   26 +++++++++++++++++++++-----
+ 1 file changed, 21 insertions(+), 5 deletions(-)
+
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -353,6 +353,18 @@ static inline bool list_add_leaf_cfs_rq(
+ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+ {
+       if (cfs_rq->on_list) {
++              struct rq *rq = rq_of(cfs_rq);
++
++              /*
++               * With cfs_rq being unthrottled/throttled during an enqueue,
++               * it can happen the tmp_alone_branch points the a leaf that
++               * we finally want to del. In this case, tmp_alone_branch moves
++               * to the prev element but it will point to rq->leaf_cfs_rq_list
++               * at the end of the enqueue.
++               */
++              if (rq->tmp_alone_branch == &cfs_rq->leaf_cfs_rq_list)
++                      rq->tmp_alone_branch = cfs_rq->leaf_cfs_rq_list.prev;
++
+               list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+               cfs_rq->on_list = 0;
+       }
+@@ -4441,6 +4453,10 @@ static int tg_unthrottle_up(struct task_
+               /* adjust cfs_rq_clock_task() */
+               cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
+                                            cfs_rq->throttled_clock_task;
++
++              /* Add cfs_rq with already running entity in the list */
++              if (cfs_rq->nr_running >= 1)
++                      list_add_leaf_cfs_rq(cfs_rq);
+       }
+ 
+       return 0;
+@@ -4452,8 +4468,10 @@ static int tg_throttle_down(struct task_
+       struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
+ 
+       /* group is entering throttled state, stop time */
+-      if (!cfs_rq->throttle_count)
++      if (!cfs_rq->throttle_count) {
+               cfs_rq->throttled_clock_task = rq_clock_task(rq);
++              list_del_leaf_cfs_rq(cfs_rq);
++      }
+       cfs_rq->throttle_count++;
+ 
+       return 0;
+@@ -4556,6 +4574,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cf
+                       break;
+       }
+ 
++      assert_list_leaf_cfs_rq(rq);
++
+       if (!se)
+               add_nr_running(rq, task_delta);
+ 
+@@ -7459,10 +7479,6 @@ static void update_blocked_averages(int
+       for_each_leaf_cfs_rq(rq, cfs_rq) {
+               struct sched_entity *se;
+ 
+-              /* throttled entities do not contribute to load */
+-              if (throttled_hierarchy(cfs_rq))
+-                      continue;
+-
+               if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+                       update_tg_load_avg(cfs_rq, 0);
+ 
diff --git a/queue-4.19/series b/queue-4.19/series

index 572452a55af9f412627bcf9dad23741894d83a9d..93671ab53cda7ca6ac12830258540d1c497914eb 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -69,3 +69,5 @@ mwifiex-drop-most-magic-numbers-from-mwifiex_process_tdls_action_frame.patch
  mwifiex-delete-unused-mwifiex_get_intf_num.patch
  kvm-svm-override-default-mmio-mask-if-memory-encryption-is-enabled.patch
  kvm-check-for-a-bad-hva-before-dropping-into-the-ghc-slow-path.patch
+sched-fair-optimize-update_blocked_averages.patch
+sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 3 Mar 2020 15:43:14 +0000 (16:43 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 3 Mar 2020 15:43:14 +0000 (16:43 +0100)
queue-4.19/sched-fair-fix-o-nr_cgroups-in-the-load-balancing-path.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/sched-fair-optimize-update_blocked_averages.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history