4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 21 Aug 2017 00:29:13 +0000 (17:29 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 21 Aug 2017 00:29:13 +0000 (17:29 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 21 Aug 2017 00:29:13 +0000 (17:29 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 21 Aug 2017 00:29:13 +0000 (17:29 -0700)
diff --git a/queue-4.9/kernel-watchdog-prevent-false-positives-with-turbo-modes.patch b/queue-4.9/kernel-watchdog-prevent-false-positives-with-turbo-modes.patch

deleted file mode 100644 (file)

index dd5d177..0000000
--- a/queue-4.9/kernel-watchdog-prevent-false-positives-with-turbo-modes.patch
+++ /dev/null
@@ -1,184 +0,0 @@
-From 7edaeb6841dfb27e362288ab8466ebdc4972e867 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Tue, 15 Aug 2017 09:50:13 +0200
-Subject: kernel/watchdog: Prevent false positives with turbo modes
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 7edaeb6841dfb27e362288ab8466ebdc4972e867 upstream.
-
-The hardlockup detector on x86 uses a performance counter based on unhalted
-CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the
-performance counter period, so the hrtimer should fire 2-3 times before the
-performance counter NMI fires. The NMI code checks whether the hrtimer
-fired since the last invocation. If not, it assumess a hard lockup.
-
-The calculation of those periods is based on the nominal CPU
-frequency. Turbo modes increase the CPU clock frequency and therefore
-shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x
-nominal frequency) the perf/NMI period is shorter than the hrtimer period
-which leads to false positives.
-
-A simple fix would be to shorten the hrtimer period, but that comes with
-the side effect of more frequent hrtimer and softlockup thread wakeups,
-which is not desired.
-
-Implement a low pass filter, which checks the perf/NMI period against
-kernel time. If the perf/NMI fires before 4/5 of the watchdog period has
-elapsed then the event is ignored and postponed to the next perf/NMI.
-
-That solves the problem and avoids the overhead of shorter hrtimer periods
-and more frequent softlockup thread wakeups.
-
-Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup detector")
-Reported-and-tested-by: Kan Liang <Kan.liang@intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: dzickus@redhat.com
-Cc: prarit@redhat.com
-Cc: ak@linux.intel.com
-Cc: babu.moger@oracle.com
-Cc: peterz@infradead.org
-Cc: eranian@google.com
-Cc: acme@redhat.com
-Cc: atomlin@redhat.com
-Cc: akpm@linux-foundation.org
-Cc: torvalds@linux-foundation.org
-Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- arch/x86/Kconfig      |    1 
- include/linux/nmi.h   |    8 ++++++
- kernel/watchdog.c     |    1 
- kernel/watchdog_hld.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++++++
- lib/Kconfig.debug     |    7 +++++
- 5 files changed, 76 insertions(+)
-
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -74,6 +74,7 @@ config X86
-       select GENERIC_STRNCPY_FROM_USER
-       select GENERIC_STRNLEN_USER
-       select GENERIC_TIME_VSYSCALL
-+      select HARDLOCKUP_CHECK_TIMESTAMP       if X86_64
-       select HAVE_ACPI_APEI                   if ACPI
-       select HAVE_ACPI_APEI_NMI               if ACPI
-       select HAVE_ALIGNED_STRUCT_PAGE         if SLUB
---- a/include/linux/nmi.h
-+++ b/include/linux/nmi.h
-@@ -118,6 +118,14 @@ extern int sysctl_hardlockup_all_cpu_bac
- #define sysctl_softlockup_all_cpu_backtrace 0
- #define sysctl_hardlockup_all_cpu_backtrace 0
- #endif
-+
-+#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
-+    defined(CONFIG_HARDLOCKUP_DETECTOR)
-+void watchdog_update_hrtimer_threshold(u64 period);
-+#else
-+static inline void watchdog_update_hrtimer_threshold(u64 period) { }
-+#endif
-+
- extern bool is_hardlockup(void);
- struct ctl_table;
- extern int proc_watchdog(struct ctl_table *, int ,
---- a/kernel/watchdog.c
-+++ b/kernel/watchdog.c
-@@ -158,6 +158,7 @@ static void set_sample_period(void)
-        * hardlockup detector generates a warning
-        */
-       sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
-+      watchdog_update_hrtimer_threshold(sample_period);
- }
- 
- /* Commands for resetting the watchdog */
---- a/kernel/watchdog_hld.c
-+++ b/kernel/watchdog_hld.c
-@@ -68,6 +68,62 @@ void touch_nmi_watchdog(void)
- }
- EXPORT_SYMBOL(touch_nmi_watchdog);
- 
-+#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
-+static DEFINE_PER_CPU(ktime_t, last_timestamp);
-+static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
-+static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
-+
-+void watchdog_update_hrtimer_threshold(u64 period)
-+{
-+      /*
-+       * The hrtimer runs with a period of (watchdog_threshold * 2) / 5
-+       *
-+       * So it runs effectively with 2.5 times the rate of the NMI
-+       * watchdog. That means the hrtimer should fire 2-3 times before
-+       * the NMI watchdog expires. The NMI watchdog on x86 is based on
-+       * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
-+       * might run way faster than expected and the NMI fires in a
-+       * smaller period than the one deduced from the nominal CPU
-+       * frequency. Depending on the Turbo-Mode factor this might be fast
-+       * enough to get the NMI period smaller than the hrtimer watchdog
-+       * period and trigger false positives.
-+       *
-+       * The sample threshold is used to check in the NMI handler whether
-+       * the minimum time between two NMI samples has elapsed. That
-+       * prevents false positives.
-+       *
-+       * Set this to 4/5 of the actual watchdog threshold period so the
-+       * hrtimer is guaranteed to fire at least once within the real
-+       * watchdog threshold.
-+       */
-+      watchdog_hrtimer_sample_threshold = period * 2;
-+}
-+
-+static bool watchdog_check_timestamp(void)
-+{
-+      ktime_t delta, now = ktime_get_mono_fast_ns();
-+
-+      delta = now - __this_cpu_read(last_timestamp);
-+      if (delta < watchdog_hrtimer_sample_threshold) {
-+              /*
-+               * If ktime is jiffies based, a stalled timer would prevent
-+               * jiffies from being incremented and the filter would look
-+               * at a stale timestamp and never trigger.
-+               */
-+              if (__this_cpu_inc_return(nmi_rearmed) < 10)
-+                      return false;
-+      }
-+      __this_cpu_write(nmi_rearmed, 0);
-+      __this_cpu_write(last_timestamp, now);
-+      return true;
-+}
-+#else
-+static inline bool watchdog_check_timestamp(void)
-+{
-+      return true;
-+}
-+#endif
-+
- static struct perf_event_attr wd_hw_attr = {
-       .type           = PERF_TYPE_HARDWARE,
-       .config         = PERF_COUNT_HW_CPU_CYCLES,
-@@ -92,6 +148,9 @@ static void watchdog_overflow_callback(s
-               return;
-       }
- 
-+      if (!watchdog_check_timestamp())
-+              return;
-+
-       /* check for a hardlockup
-        * This is done by making sure our timer interrupt
-        * is incrementing.  The timer interrupt should have
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -325,6 +325,13 @@ config SECTION_MISMATCH_WARN_ONLY
-         If unsure, say Y.
- 
- #
-+# Enables a timestamp based low pass filter to compensate for perf based
-+# hard lockup detection which runs too fast due to turbo modes.
-+#
-+config HARDLOCKUP_CHECK_TIMESTAMP
-+      bool
-+
-+#
- # Select this config option from the architecture Kconfig, if it
- # is preferred to always offer frame pointers as a config
- # option on the architecture (regardless of KERNEL_DEBUG):
diff --git a/queue-4.9/sanitize-move_pages-permission-checks.patch b/queue-4.9/sanitize-move_pages-permission-checks.patch

new file mode 100644 (file)

index 0000000..bc29486
--- /dev/null
+++ b/queue-4.9/sanitize-move_pages-permission-checks.patch
@@ -0,0 +1,75 @@
+From 197e7e521384a23b9e585178f3f11c9fa08274b9 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sun, 20 Aug 2017 13:26:27 -0700
+Subject: Sanitize 'move_pages()' permission checks
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 197e7e521384a23b9e585178f3f11c9fa08274b9 upstream.
+
+The 'move_paghes()' system call was introduced long long ago with the
+same permission checks as for sending a signal (except using
+CAP_SYS_NICE instead of CAP_SYS_KILL for the overriding capability).
+
+That turns out to not be a great choice - while the system call really
+only moves physical page allocations around (and you need other
+capabilities to do a lot of it), you can check the return value to map
+out some the virtual address choices and defeat ASLR of a binary that
+still shares your uid.
+
+So change the access checks to the more common 'ptrace_may_access()'
+model instead.
+
+This tightens the access checks for the uid, and also effectively
+changes the CAP_SYS_NICE check to CAP_SYS_PTRACE, but it's unlikely that
+anybody really _uses_ this legacy system call any more (we hav ebetter
+NUMA placement models these days), so I expect nobody to notice.
+
+Famous last words.
+
+Reported-by: Otto Ebeling <otto.ebeling@iki.fi>
+Acked-by: Eric W. Biederman <ebiederm@xmission.com>
+Cc: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/migrate.c |   13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -40,6 +40,7 @@
+ #include <linux/mmu_notifier.h>
+ #include <linux/page_idle.h>
+ #include <linux/page_owner.h>
++#include <linux/ptrace.h>
+ 
+ #include <asm/tlbflush.h>
+ 
+@@ -1663,7 +1664,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
+               const int __user *, nodes,
+               int __user *, status, int, flags)
+ {
+-      const struct cred *cred = current_cred(), *tcred;
+       struct task_struct *task;
+       struct mm_struct *mm;
+       int err;
+@@ -1687,14 +1687,9 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
+ 
+       /*
+        * Check if this process has the right to modify the specified
+-       * process. The right exists if the process has administrative
+-       * capabilities, superuser privileges or the same
+-       * userid as the target process.
+-       */
+-      tcred = __task_cred(task);
+-      if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
+-          !uid_eq(cred->uid,  tcred->suid) && !uid_eq(cred->uid,  tcred->uid) &&
+-          !capable(CAP_SYS_NICE)) {
++       * process. Use the regular "ptrace_may_access()" checks.
++       */
++      if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
+               rcu_read_unlock();
+               err = -EPERM;
+               goto out;
diff --git a/queue-4.9/series b/queue-4.9/series

index 836f27966492ef3ea2735a1c1bfe91361f1c958e..a0648c32f01a984d775a64a371101ea7e4ad553a 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -21,4 +21,4 @@ irqchip-atmel-aic-fix-unbalanced-of_node_put-in-aic_common_irq_fixup.patch
  irqchip-atmel-aic-fix-unbalanced-refcount-in-aic_common_rtc_irq_fixup.patch
  genirq-restore-trigger-settings-in-irq_modify_status.patch
  genirq-ipi-fixup-checks-against-nr_cpu_ids.patch
-kernel-watchdog-prevent-false-positives-with-turbo-modes.patch
+sanitize-move_pages-permission-checks.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 21 Aug 2017 00:29:13 +0000 (17:29 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 21 Aug 2017 00:29:13 +0000 (17:29 -0700)
queue-4.9/kernel-watchdog-prevent-false-positives-with-turbo-modes.patch	[deleted file]	patch \| blob \| blame \| history
queue-4.9/sanitize-move_pages-permission-checks.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history