]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 6 Jan 2018 08:45:48 +0000 (09:45 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 6 Jan 2018 08:45:48 +0000 (09:45 +0100)
added patches:
x86-cpu-always-show-current-cpu-frequency-in-proc-cpuinfo.patch
x86-cpu-avoid-unnecessary-ipis-in-arch_freq_get_on_cpu.patch

queue-4.14/series
queue-4.14/x86-cpu-always-show-current-cpu-frequency-in-proc-cpuinfo.patch [new file with mode: 0644]
queue-4.14/x86-cpu-avoid-unnecessary-ipis-in-arch_freq_get_on_cpu.patch [new file with mode: 0644]

index 9154c682fde7f5d211b8fbd3f371a299c3052018..8504ae5d45995a85866237899a94223bb245cfb7 100644 (file)
@@ -19,3 +19,5 @@ drm-i915-disable-dc-states-around-gmbus-on-glk.patch
 drm-i915-apply-display-wa-1183-on-skl-kbl-and-cfl.patch
 sunxi-rsb-include-of-based-modalias-in-device-uevent.patch
 fscache-fix-the-default-for-fscache_maybe_release_page.patch
+x86-cpu-avoid-unnecessary-ipis-in-arch_freq_get_on_cpu.patch
+x86-cpu-always-show-current-cpu-frequency-in-proc-cpuinfo.patch
diff --git a/queue-4.14/x86-cpu-always-show-current-cpu-frequency-in-proc-cpuinfo.patch b/queue-4.14/x86-cpu-always-show-current-cpu-frequency-in-proc-cpuinfo.patch
new file mode 100644 (file)
index 0000000..a2b28d5
--- /dev/null
@@ -0,0 +1,244 @@
+From 7d5905dc14a87805a59f3c5bf70173aac2bb18f8 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Wed, 15 Nov 2017 02:13:40 +0100
+Subject: x86 / CPU: Always show current CPU frequency in /proc/cpuinfo
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit 7d5905dc14a87805a59f3c5bf70173aac2bb18f8 upstream.
+
+After commit 890da9cf0983 (Revert "x86: do not use cpufreq_quick_get()
+for /proc/cpuinfo "cpu MHz"") the "cpu MHz" number in /proc/cpuinfo
+on x86 can be either the nominal CPU frequency (which is constant)
+or the frequency most recently requested by a scaling governor in
+cpufreq, depending on the cpufreq configuration.  That is somewhat
+inconsistent and is different from what it was before 4.13, so in
+order to restore the previous behavior, make it report the current
+CPU frequency like the scaling_cur_freq sysfs file in cpufreq.
+
+To that end, modify the /proc/cpuinfo implementation on x86 to use
+aperfmperf_snapshot_khz() to snapshot the APERF and MPERF feedback
+registers, if available, and use their values to compute the CPU
+frequency to be reported as "cpu MHz".
+
+However, do that carefully enough to avoid accumulating delays that
+lead to unacceptable access times for /proc/cpuinfo on systems with
+many CPUs.  Run aperfmperf_snapshot_khz() once on all CPUs
+asynchronously at the /proc/cpuinfo open time, add a single delay
+upfront (if necessary) at that point and simply compute the current
+frequency while running show_cpuinfo() for each individual CPU.
+
+Also, to avoid slowing down /proc/cpuinfo accesses too much, reduce
+the default delay between consecutive APERF and MPERF reads to 10 ms,
+which should be sufficient to get large enough numbers for the
+frequency computation in all cases.
+
+Fixes: 890da9cf0983 (Revert "x86: do not use cpufreq_quick_get() for /proc/cpuinfo "cpu MHz"")
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/Makefile     |    2 -
+ arch/x86/kernel/cpu/aperfmperf.c |   74 +++++++++++++++++++++++++++------------
+ arch/x86/kernel/cpu/cpu.h        |    3 +
+ arch/x86/kernel/cpu/proc.c       |    6 ++-
+ fs/proc/cpuinfo.c                |    6 +++
+ include/linux/cpufreq.h          |    1 
+ 6 files changed, 68 insertions(+), 24 deletions(-)
+
+--- a/arch/x86/kernel/cpu/Makefile
++++ b/arch/x86/kernel/cpu/Makefile
+@@ -22,7 +22,7 @@ obj-y                        += common.o
+ obj-y                 += rdrand.o
+ obj-y                 += match.o
+ obj-y                 += bugs.o
+-obj-$(CONFIG_CPU_FREQ)        += aperfmperf.o
++obj-y                 += aperfmperf.o
+ obj-y                 += cpuid-deps.o
+ obj-$(CONFIG_PROC_FS) += proc.o
+--- a/arch/x86/kernel/cpu/aperfmperf.c
++++ b/arch/x86/kernel/cpu/aperfmperf.c
+@@ -14,6 +14,8 @@
+ #include <linux/percpu.h>
+ #include <linux/smp.h>
++#include "cpu.h"
++
+ struct aperfmperf_sample {
+       unsigned int    khz;
+       ktime_t time;
+@@ -24,7 +26,7 @@ struct aperfmperf_sample {
+ static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
+ #define APERFMPERF_CACHE_THRESHOLD_MS 10
+-#define APERFMPERF_REFRESH_DELAY_MS   20
++#define APERFMPERF_REFRESH_DELAY_MS   10
+ #define APERFMPERF_STALE_THRESHOLD_MS 1000
+ /*
+@@ -38,8 +40,6 @@ static void aperfmperf_snapshot_khz(void
+       u64 aperf, aperf_delta;
+       u64 mperf, mperf_delta;
+       struct aperfmperf_sample *s = this_cpu_ptr(&samples);
+-      ktime_t now = ktime_get();
+-      s64 time_delta = ktime_ms_delta(now, s->time);
+       unsigned long flags;
+       local_irq_save(flags);
+@@ -57,38 +57,68 @@ static void aperfmperf_snapshot_khz(void
+       if (mperf_delta == 0)
+               return;
+-      s->time = now;
++      s->time = ktime_get();
+       s->aperf = aperf;
+       s->mperf = mperf;
+-
+-      /* If the previous iteration was too long ago, discard it. */
+-      if (time_delta > APERFMPERF_STALE_THRESHOLD_MS)
+-              s->khz = 0;
+-      else
+-              s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
++      s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
+ }
+-unsigned int arch_freq_get_on_cpu(int cpu)
++static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
+ {
+-      s64 time_delta;
+-      unsigned int khz;
++      s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
++
++      /* Don't bother re-computing within the cache threshold time. */
++      if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
++              return true;
++
++      smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
++
++      /* Return false if the previous iteration was too long ago. */
++      return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
++}
++unsigned int aperfmperf_get_khz(int cpu)
++{
+       if (!cpu_khz)
+               return 0;
+       if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+               return 0;
+-      /* Don't bother re-computing within the cache threshold time. */
+-      time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu));
+-      khz = per_cpu(samples.khz, cpu);
+-      if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
+-              return khz;
++      aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
++      return per_cpu(samples.khz, cpu);
++}
+-      smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
+-      khz = per_cpu(samples.khz, cpu);
+-      if (khz)
+-              return khz;
++void arch_freq_prepare_all(void)
++{
++      ktime_t now = ktime_get();
++      bool wait = false;
++      int cpu;
++
++      if (!cpu_khz)
++              return;
++
++      if (!static_cpu_has(X86_FEATURE_APERFMPERF))
++              return;
++
++      for_each_online_cpu(cpu)
++              if (!aperfmperf_snapshot_cpu(cpu, now, false))
++                      wait = true;
++
++      if (wait)
++              msleep(APERFMPERF_REFRESH_DELAY_MS);
++}
++
++unsigned int arch_freq_get_on_cpu(int cpu)
++{
++      if (!cpu_khz)
++              return 0;
++
++      if (!static_cpu_has(X86_FEATURE_APERFMPERF))
++              return 0;
++
++      if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
++              return per_cpu(samples.khz, cpu);
+       msleep(APERFMPERF_REFRESH_DELAY_MS);
+       smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -47,4 +47,7 @@ extern const struct cpu_dev *const __x86
+ extern void get_cpu_cap(struct cpuinfo_x86 *c);
+ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
++
++unsigned int aperfmperf_get_khz(int cpu);
++
+ #endif /* ARCH_X86_CPU_H */
+--- a/arch/x86/kernel/cpu/proc.c
++++ b/arch/x86/kernel/cpu/proc.c
+@@ -5,6 +5,8 @@
+ #include <linux/seq_file.h>
+ #include <linux/cpufreq.h>
++#include "cpu.h"
++
+ /*
+  *    Get CPU information for use by the procfs.
+  */
+@@ -78,9 +80,11 @@ static int show_cpuinfo(struct seq_file
+               seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
+       if (cpu_has(c, X86_FEATURE_TSC)) {
+-              unsigned int freq = cpufreq_quick_get(cpu);
++              unsigned int freq = aperfmperf_get_khz(cpu);
+               if (!freq)
++                      freq = cpufreq_quick_get(cpu);
++              if (!freq)
+                       freq = cpu_khz;
+               seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
+                          freq / 1000, (freq % 1000));
+--- a/fs/proc/cpuinfo.c
++++ b/fs/proc/cpuinfo.c
+@@ -1,12 +1,18 @@
+ // SPDX-License-Identifier: GPL-2.0
++#include <linux/cpufreq.h>
+ #include <linux/fs.h>
+ #include <linux/init.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
++__weak void arch_freq_prepare_all(void)
++{
++}
++
+ extern const struct seq_operations cpuinfo_op;
+ static int cpuinfo_open(struct inode *inode, struct file *file)
+ {
++      arch_freq_prepare_all();
+       return seq_open(file, &cpuinfo_op);
+ }
+--- a/include/linux/cpufreq.h
++++ b/include/linux/cpufreq.h
+@@ -917,6 +917,7 @@ static inline bool policy_has_boost_freq
+ }
+ #endif
++extern void arch_freq_prepare_all(void);
+ extern unsigned int arch_freq_get_on_cpu(int cpu);
+ /* the following are really really optional */
diff --git a/queue-4.14/x86-cpu-avoid-unnecessary-ipis-in-arch_freq_get_on_cpu.patch b/queue-4.14/x86-cpu-avoid-unnecessary-ipis-in-arch_freq_get_on_cpu.patch
new file mode 100644 (file)
index 0000000..d8bb623
--- /dev/null
@@ -0,0 +1,66 @@
+From b29c6ef7bb1257853c1e31616d84f55e561cf631 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Mon, 13 Nov 2017 02:15:39 +0100
+Subject: x86 / CPU: Avoid unnecessary IPIs in arch_freq_get_on_cpu()
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+commit b29c6ef7bb1257853c1e31616d84f55e561cf631 upstream.
+
+Even though aperfmperf_snapshot_khz() caches the samples.khz value to
+return if called again in a sufficiently short time, its caller,
+arch_freq_get_on_cpu(), still uses smp_call_function_single() to run it
+which may allow user space to trigger an IPI storm by reading from the
+scaling_cur_freq cpufreq sysfs file in a tight loop.
+
+To avoid that, move the decision on whether or not to return the cached
+samples.khz value to arch_freq_get_on_cpu().
+
+This change was part of commit 941f5f0f6ef5 ("x86: CPU: Fix up "cpu MHz"
+in /proc/cpuinfo"), but it was not the reason for the revert and it
+remains applicable.
+
+Fixes: 4815d3c56d1e (cpufreq: x86: Make scaling_cur_freq behave more as expected)
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Reviewed-by: WANG Chao <chao.wang@ucloud.cn>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/aperfmperf.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/aperfmperf.c
++++ b/arch/x86/kernel/cpu/aperfmperf.c
+@@ -42,10 +42,6 @@ static void aperfmperf_snapshot_khz(void
+       s64 time_delta = ktime_ms_delta(now, s->time);
+       unsigned long flags;
+-      /* Don't bother re-computing within the cache threshold time. */
+-      if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
+-              return;
+-
+       local_irq_save(flags);
+       rdmsrl(MSR_IA32_APERF, aperf);
+       rdmsrl(MSR_IA32_MPERF, mperf);
+@@ -74,6 +70,7 @@ static void aperfmperf_snapshot_khz(void
+ unsigned int arch_freq_get_on_cpu(int cpu)
+ {
++      s64 time_delta;
+       unsigned int khz;
+       if (!cpu_khz)
+@@ -82,6 +79,12 @@ unsigned int arch_freq_get_on_cpu(int cp
+       if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+               return 0;
++      /* Don't bother re-computing within the cache threshold time. */
++      time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu));
++      khz = per_cpu(samples.khz, cpu);
++      if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
++              return khz;
++
+       smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
+       khz = per_cpu(samples.khz, cpu);
+       if (khz)