From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 19 Jul 2017 09:17:35 +0000 (+0200)
Subject: 4.9-stable patches
X-Git-Tag: v4.12.3~6
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d27eef41f47263420e7f04b627b567e1b4a93d79;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	sched-fair-cpumask-export-for_each_cpu_wrap.patch
	sched-topology-optimize-build_group_mask.patch
---

diff --git a/queue-4.9/sched-fair-cpumask-export-for_each_cpu_wrap.patch b/queue-4.9/sched-fair-cpumask-export-for_each_cpu_wrap.patch
new file mode 100644
index 00000000000..3c7ccb9be3b
--- /dev/null
+++ b/queue-4.9/sched-fair-cpumask-export-for_each_cpu_wrap.patch
@@ -0,0 +1,184 @@
+From c6508a39640b9a27fc2bc10cb708152672c82045 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 14 Apr 2017 14:20:05 +0200
+Subject: sched/fair, cpumask: Export for_each_cpu_wrap()
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit c6508a39640b9a27fc2bc10cb708152672c82045 upstream.
+
+commit c743f0a5c50f2fcbc628526279cfa24f3dabe182 upstream.
+
+More users for for_each_cpu_wrap() have appeared. Promote the construct
+to generic cpumask interface.
+
+The implementation is slightly modified to reduce arguments.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Lauro Ramos Venancio <lvenanci@redhat.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: lwang@redhat.com
+Link: http://lkml.kernel.org/r/20170414122005.o35me2h5nowqkxbv@hirez.programming.kicks-ass.net
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/cpumask.h |   17 +++++++++++++++++
+ kernel/sched/fair.c     |   45 ++++-----------------------------------------
+ lib/cpumask.c           |   32 ++++++++++++++++++++++++++++++++
+ 3 files changed, 53 insertions(+), 41 deletions(-)
+
+--- a/include/linux/cpumask.h
++++ b/include/linux/cpumask.h
+@@ -236,6 +236,23 @@ unsigned int cpumask_local_spread(unsign
+ 		(cpu) = cpumask_next_zero((cpu), (mask)),	\
+ 		(cpu) < nr_cpu_ids;)
+ 
++extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
++
++/**
++ * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
++ * @cpu: the (optionally unsigned) integer iterator
++ * @mask: the cpumask poiter
++ * @start: the start location
++ *
++ * The implementation does not assume any bit in @mask is set (including @start).
++ *
++ * After the loop, cpu is >= nr_cpu_ids.
++ */
++#define for_each_cpu_wrap(cpu, mask, start)					\
++	for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false);	\
++	     (cpu) < nr_cpumask_bits;						\
++	     (cpu) = cpumask_next_wrap((cpu), (mask), (start), true))
++
+ /**
+  * for_each_cpu_and - iterate over every cpu in both masks
+  * @cpu: the (optionally unsigned) integer iterator
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -5310,43 +5310,6 @@ find_idlest_cpu(struct sched_group *grou
+ 	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
+ }
+ 
+-/*
+- * Implement a for_each_cpu() variant that starts the scan at a given cpu
+- * (@start), and wraps around.
+- *
+- * This is used to scan for idle CPUs; such that not all CPUs looking for an
+- * idle CPU find the same CPU. The down-side is that tasks tend to cycle
+- * through the LLC domain.
+- *
+- * Especially tbench is found sensitive to this.
+- */
+-
+-static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int *wrapped)
+-{
+-	int next;
+-
+-again:
+-	next = find_next_bit(cpumask_bits(mask), nr_cpumask_bits, n+1);
+-
+-	if (*wrapped) {
+-		if (next >= start)
+-			return nr_cpumask_bits;
+-	} else {
+-		if (next >= nr_cpumask_bits) {
+-			*wrapped = 1;
+-			n = -1;
+-			goto again;
+-		}
+-	}
+-
+-	return next;
+-}
+-
+-#define for_each_cpu_wrap(cpu, mask, start, wrap)				\
+-	for ((wrap) = 0, (cpu) = (start)-1;					\
+-		(cpu) = cpumask_next_wrap((cpu), (mask), (start), &(wrap)),	\
+-		(cpu) < nr_cpumask_bits; )
+-
+ #ifdef CONFIG_SCHED_SMT
+ 
+ static inline void set_idle_cores(int cpu, int val)
+@@ -5406,14 +5369,14 @@ unlock:
+ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
+ {
+ 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+-	int core, cpu, wrap;
++	int core, cpu;
+ 
+ 	if (!test_idle_cores(target, false))
+ 		return -1;
+ 
+ 	cpumask_and(cpus, sched_domain_span(sd), tsk_cpus_allowed(p));
+ 
+-	for_each_cpu_wrap(core, cpus, target, wrap) {
++	for_each_cpu_wrap(core, cpus, target) {
+ 		bool idle = true;
+ 
+ 		for_each_cpu(cpu, cpu_smt_mask(core)) {
+@@ -5476,7 +5439,7 @@ static int select_idle_cpu(struct task_s
+ 	u64 avg_cost, avg_idle = this_rq()->avg_idle;
+ 	u64 time, cost;
+ 	s64 delta;
+-	int cpu, wrap;
++	int cpu;
+ 
+ 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
+ 	if (!this_sd)
+@@ -5493,7 +5456,7 @@ static int select_idle_cpu(struct task_s
+ 
+ 	time = local_clock();
+ 
+-	for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
++	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+ 		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+ 			continue;
+ 		if (idle_cpu(cpu))
+--- a/lib/cpumask.c
++++ b/lib/cpumask.c
+@@ -43,6 +43,38 @@ int cpumask_any_but(const struct cpumask
+ }
+ EXPORT_SYMBOL(cpumask_any_but);
+ 
++/**
++ * cpumask_next_wrap - helper to implement for_each_cpu_wrap
++ * @n: the cpu prior to the place to search
++ * @mask: the cpumask pointer
++ * @start: the start point of the iteration
++ * @wrap: assume @n crossing @start terminates the iteration
++ *
++ * Returns >= nr_cpu_ids on completion
++ *
++ * Note: the @wrap argument is required for the start condition when
++ * we cannot assume @start is set in @mask.
++ */
++int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
++{
++	int next;
++
++again:
++	next = cpumask_next(n, mask);
++
++	if (wrap && n < start && next >= start) {
++		return nr_cpumask_bits;
++
++	} else if (next >= nr_cpumask_bits) {
++		wrap = true;
++		n = -1;
++		goto again;
++	}
++
++	return next;
++}
++EXPORT_SYMBOL(cpumask_next_wrap);
++
+ /* These are not inline because of header tangles. */
+ #ifdef CONFIG_CPUMASK_OFFSTACK
+ /**
diff --git a/queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch b/queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch
index d152b9e9512..8bd7202a14b 100644
--- a/queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch
+++ b/queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch
@@ -73,7 +73,7 @@ Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
   * range.
 @@ -6120,11 +6123,24 @@ static void build_group_mask(struct sche
  
- 	for_each_cpu(i, span) {
+ 	for_each_cpu(i, sg_span) {
  		sibling = *per_cpu_ptr(sdd->sd, i);
 -		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
 +
diff --git a/queue-4.9/sched-topology-optimize-build_group_mask.patch b/queue-4.9/sched-topology-optimize-build_group_mask.patch
new file mode 100644
index 00000000000..f29cf9c5c8f
--- /dev/null
+++ b/queue-4.9/sched-topology-optimize-build_group_mask.patch
@@ -0,0 +1,46 @@
+From f32d782e31bf079f600dcec126ed117b0577e85c Mon Sep 17 00:00:00 2001
+From: Lauro Ramos Venancio <lvenanci@redhat.com>
+Date: Thu, 20 Apr 2017 16:51:40 -0300
+Subject: sched/topology: Optimize build_group_mask()
+
+From: Lauro Ramos Venancio <lvenanci@redhat.com>
+
+commit f32d782e31bf079f600dcec126ed117b0577e85c upstream.
+
+The group mask is always used in intersection with the group CPUs. So,
+when building the group mask, we don't have to care about CPUs that are
+not part of the group.
+
+Signed-off-by: Lauro Ramos Venancio <lvenanci@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: lwang@redhat.com
+Cc: riel@redhat.com
+Link: http://lkml.kernel.org/r/1492717903-5195-2-git-send-email-lvenanci@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/core.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -6113,12 +6113,12 @@ enum s_alloc {
+  */
+ static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
+ {
+-	const struct cpumask *span = sched_domain_span(sd);
++	const struct cpumask *sg_span = sched_group_cpus(sg);
+ 	struct sd_data *sdd = sd->private;
+ 	struct sched_domain *sibling;
+ 	int i;
+ 
+-	for_each_cpu(i, span) {
++	for_each_cpu(i, sg_span) {
+ 		sibling = *per_cpu_ptr(sdd->sd, i);
+ 		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
+ 			continue;
diff --git a/queue-4.9/series b/queue-4.9/series
index 9abce79e92b..edbd004b6c1 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -59,7 +59,9 @@ crypto-sha1-ssse3-disable-avx2.patch
 crypto-caam-properly-set-iv-after-en-de-crypt.patch
 crypto-caam-fix-signals-handling.patch
 revert-sched-core-optimize-sched_smt.patch
+sched-fair-cpumask-export-for_each_cpu_wrap.patch
 sched-topology-fix-building-of-overlapping-sched-groups.patch
+sched-topology-optimize-build_group_mask.patch
 sched-topology-fix-overlapping-sched_group_mask.patch
 pm-wakeirq-convert-to-srcu.patch
 pm-qos-return-einval-for-bogus-strings.patch