+++ /dev/null
-From 7edaeb6841dfb27e362288ab8466ebdc4972e867 Mon Sep 17 00:00:00 2001
-From: Thomas Gleixner <tglx@linutronix.de>
-Date: Tue, 15 Aug 2017 09:50:13 +0200
-Subject: kernel/watchdog: Prevent false positives with turbo modes
-
-From: Thomas Gleixner <tglx@linutronix.de>
-
-commit 7edaeb6841dfb27e362288ab8466ebdc4972e867 upstream.
-
-The hardlockup detector on x86 uses a performance counter based on unhalted
-CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the
-performance counter period, so the hrtimer should fire 2-3 times before the
-performance counter NMI fires. The NMI code checks whether the hrtimer
-fired since the last invocation. If not, it assumess a hard lockup.
-
-The calculation of those periods is based on the nominal CPU
-frequency. Turbo modes increase the CPU clock frequency and therefore
-shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x
-nominal frequency) the perf/NMI period is shorter than the hrtimer period
-which leads to false positives.
-
-A simple fix would be to shorten the hrtimer period, but that comes with
-the side effect of more frequent hrtimer and softlockup thread wakeups,
-which is not desired.
-
-Implement a low pass filter, which checks the perf/NMI period against
-kernel time. If the perf/NMI fires before 4/5 of the watchdog period has
-elapsed then the event is ignored and postponed to the next perf/NMI.
-
-That solves the problem and avoids the overhead of shorter hrtimer periods
-and more frequent softlockup thread wakeups.
-
-Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup detector")
-Reported-and-tested-by: Kan Liang <Kan.liang@intel.com>
-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-Cc: dzickus@redhat.com
-Cc: prarit@redhat.com
-Cc: ak@linux.intel.com
-Cc: babu.moger@oracle.com
-Cc: peterz@infradead.org
-Cc: eranian@google.com
-Cc: acme@redhat.com
-Cc: atomlin@redhat.com
-Cc: akpm@linux-foundation.org
-Cc: torvalds@linux-foundation.org
-Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-
----
- arch/x86/Kconfig | 1
- include/linux/nmi.h | 8 ++++++
- kernel/watchdog.c | 1
- kernel/watchdog_hld.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
- lib/Kconfig.debug | 7 +++++
- 5 files changed, 76 insertions(+)
-
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -74,6 +74,7 @@ config X86
- select GENERIC_STRNCPY_FROM_USER
- select GENERIC_STRNLEN_USER
- select GENERIC_TIME_VSYSCALL
-+ select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
- select HAVE_ACPI_APEI if ACPI
- select HAVE_ACPI_APEI_NMI if ACPI
- select HAVE_ALIGNED_STRUCT_PAGE if SLUB
---- a/include/linux/nmi.h
-+++ b/include/linux/nmi.h
-@@ -118,6 +118,14 @@ extern int sysctl_hardlockup_all_cpu_bac
- #define sysctl_softlockup_all_cpu_backtrace 0
- #define sysctl_hardlockup_all_cpu_backtrace 0
- #endif
-+
-+#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
-+ defined(CONFIG_HARDLOCKUP_DETECTOR)
-+void watchdog_update_hrtimer_threshold(u64 period);
-+#else
-+static inline void watchdog_update_hrtimer_threshold(u64 period) { }
-+#endif
-+
- extern bool is_hardlockup(void);
- struct ctl_table;
- extern int proc_watchdog(struct ctl_table *, int ,
---- a/kernel/watchdog.c
-+++ b/kernel/watchdog.c
-@@ -158,6 +158,7 @@ static void set_sample_period(void)
- * hardlockup detector generates a warning
- */
- sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
-+ watchdog_update_hrtimer_threshold(sample_period);
- }
-
- /* Commands for resetting the watchdog */
---- a/kernel/watchdog_hld.c
-+++ b/kernel/watchdog_hld.c
-@@ -68,6 +68,62 @@ void touch_nmi_watchdog(void)
- }
- EXPORT_SYMBOL(touch_nmi_watchdog);
-
-+#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
-+static DEFINE_PER_CPU(ktime_t, last_timestamp);
-+static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
-+static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
-+
-+void watchdog_update_hrtimer_threshold(u64 period)
-+{
-+ /*
-+ * The hrtimer runs with a period of (watchdog_threshold * 2) / 5
-+ *
-+ * So it runs effectively with 2.5 times the rate of the NMI
-+ * watchdog. That means the hrtimer should fire 2-3 times before
-+ * the NMI watchdog expires. The NMI watchdog on x86 is based on
-+ * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
-+ * might run way faster than expected and the NMI fires in a
-+ * smaller period than the one deduced from the nominal CPU
-+ * frequency. Depending on the Turbo-Mode factor this might be fast
-+ * enough to get the NMI period smaller than the hrtimer watchdog
-+ * period and trigger false positives.
-+ *
-+ * The sample threshold is used to check in the NMI handler whether
-+ * the minimum time between two NMI samples has elapsed. That
-+ * prevents false positives.
-+ *
-+ * Set this to 4/5 of the actual watchdog threshold period so the
-+ * hrtimer is guaranteed to fire at least once within the real
-+ * watchdog threshold.
-+ */
-+ watchdog_hrtimer_sample_threshold = period * 2;
-+}
-+
-+static bool watchdog_check_timestamp(void)
-+{
-+ ktime_t delta, now = ktime_get_mono_fast_ns();
-+
-+ delta = now - __this_cpu_read(last_timestamp);
-+ if (delta < watchdog_hrtimer_sample_threshold) {
-+ /*
-+ * If ktime is jiffies based, a stalled timer would prevent
-+ * jiffies from being incremented and the filter would look
-+ * at a stale timestamp and never trigger.
-+ */
-+ if (__this_cpu_inc_return(nmi_rearmed) < 10)
-+ return false;
-+ }
-+ __this_cpu_write(nmi_rearmed, 0);
-+ __this_cpu_write(last_timestamp, now);
-+ return true;
-+}
-+#else
-+static inline bool watchdog_check_timestamp(void)
-+{
-+ return true;
-+}
-+#endif
-+
- static struct perf_event_attr wd_hw_attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
-@@ -92,6 +148,9 @@ static void watchdog_overflow_callback(s
- return;
- }
-
-+ if (!watchdog_check_timestamp())
-+ return;
-+
- /* check for a hardlockup
- * This is done by making sure our timer interrupt
- * is incrementing. The timer interrupt should have
---- a/lib/Kconfig.debug
-+++ b/lib/Kconfig.debug
-@@ -325,6 +325,13 @@ config SECTION_MISMATCH_WARN_ONLY
- If unsure, say Y.
-
- #
-+# Enables a timestamp based low pass filter to compensate for perf based
-+# hard lockup detection which runs too fast due to turbo modes.
-+#
-+config HARDLOCKUP_CHECK_TIMESTAMP
-+ bool
-+
-+#
- # Select this config option from the architecture Kconfig, if it
- # is preferred to always offer frame pointers as a config
- # option on the architecture (regardless of KERNEL_DEBUG):
--- /dev/null
+From 197e7e521384a23b9e585178f3f11c9fa08274b9 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Sun, 20 Aug 2017 13:26:27 -0700
+Subject: Sanitize 'move_pages()' permission checks
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 197e7e521384a23b9e585178f3f11c9fa08274b9 upstream.
+
+The 'move_paghes()' system call was introduced long long ago with the
+same permission checks as for sending a signal (except using
+CAP_SYS_NICE instead of CAP_SYS_KILL for the overriding capability).
+
+That turns out to not be a great choice - while the system call really
+only moves physical page allocations around (and you need other
+capabilities to do a lot of it), you can check the return value to map
+out some the virtual address choices and defeat ASLR of a binary that
+still shares your uid.
+
+So change the access checks to the more common 'ptrace_may_access()'
+model instead.
+
+This tightens the access checks for the uid, and also effectively
+changes the CAP_SYS_NICE check to CAP_SYS_PTRACE, but it's unlikely that
+anybody really _uses_ this legacy system call any more (we hav ebetter
+NUMA placement models these days), so I expect nobody to notice.
+
+Famous last words.
+
+Reported-by: Otto Ebeling <otto.ebeling@iki.fi>
+Acked-by: Eric W. Biederman <ebiederm@xmission.com>
+Cc: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/migrate.c | 13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -40,6 +40,7 @@
+ #include <linux/mmu_notifier.h>
+ #include <linux/page_idle.h>
+ #include <linux/page_owner.h>
++#include <linux/ptrace.h>
+
+ #include <asm/tlbflush.h>
+
+@@ -1663,7 +1664,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
+ const int __user *, nodes,
+ int __user *, status, int, flags)
+ {
+- const struct cred *cred = current_cred(), *tcred;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ int err;
+@@ -1687,14 +1687,9 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
+
+ /*
+ * Check if this process has the right to modify the specified
+- * process. The right exists if the process has administrative
+- * capabilities, superuser privileges or the same
+- * userid as the target process.
+- */
+- tcred = __task_cred(task);
+- if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) &&
+- !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) &&
+- !capable(CAP_SYS_NICE)) {
++ * process. Use the regular "ptrace_may_access()" checks.
++ */
++ if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
+ rcu_read_unlock();
+ err = -EPERM;
+ goto out;