5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 15 Mar 2021 11:17:14 +0000 (12:17 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 15 Mar 2021 11:17:14 +0000 (12:17 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Mar 2021 11:17:14 +0000 (12:17 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Mar 2021 11:17:14 +0000 (12:17 +0100)
diff --git a/queue-5.10/mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch b/queue-5.10/mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch

new file mode 100644 (file)

index 0000000..b03361a
--- /dev/null
+++ b/queue-5.10/mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch
@@ -0,0 +1,82 @@
+From 96cfe2c0fd23ea7c2368d14f769d287e7ae1082e Mon Sep 17 00:00:00 2001
+From: Suren Baghdasaryan <surenb@google.com>
+Date: Fri, 12 Mar 2021 21:08:06 -0800
+Subject: mm/madvise: replace ptrace attach requirement for process_madvise
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+commit 96cfe2c0fd23ea7c2368d14f769d287e7ae1082e upstream.
+
+process_madvise currently requires ptrace attach capability.
+PTRACE_MODE_ATTACH gives one process complete control over another
+process.  It effectively removes the security boundary between the two
+processes (in one direction).  Granting ptrace attach capability even to a
+system process is considered dangerous since it creates an attack surface.
+This severely limits the usage of this API.
+
+The operations process_madvise can perform do not affect the correctness
+of the operation of the target process; they only affect where the data is
+physically located (and therefore, how fast it can be accessed).  What we
+want is the ability for one process to influence another process in order
+to optimize performance across the entire system while leaving the
+security boundary intact.
+
+Replace PTRACE_MODE_ATTACH with a combination of PTRACE_MODE_READ and
+CAP_SYS_NICE.  PTRACE_MODE_READ to prevent leaking ASLR metadata and
+CAP_SYS_NICE for influencing process performance.
+
+Link: https://lkml.kernel.org/r/20210303185807.2160264-1-surenb@google.com
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Acked-by: David Rientjes <rientjes@google.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Jeff Vander Stoep <jeffv@google.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tim Murray <timmurray@google.com>
+Cc: Florian Weimer <fweimer@redhat.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: James Morris <jmorris@namei.org>
+Cc: <stable@vger.kernel.org>   [5.10+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/madvise.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/mm/madvise.c
++++ b/mm/madvise.c
+@@ -1202,12 +1202,22 @@ SYSCALL_DEFINE5(process_madvise, int, pi
+               goto release_task;
+       }
+ 
+-      mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
++      /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
++      mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
+       if (IS_ERR_OR_NULL(mm)) {
+               ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+               goto release_task;
+       }
+ 
++      /*
++       * Require CAP_SYS_NICE for influencing process performance. Note that
++       * only non-destructive hints are currently supported.
++       */
++      if (!capable(CAP_SYS_NICE)) {
++              ret = -EPERM;
++              goto release_mm;
++      }
++
+       total_len = iov_iter_count(&iter);
+ 
+       while (iov_iter_count(&iter)) {
+@@ -1222,6 +1232,7 @@ SYSCALL_DEFINE5(process_madvise, int, pi
+       if (ret == 0)
+               ret = total_len - iov_iter_count(&iter);
+ 
++release_mm:
+       mmput(mm);
+ release_task:
+       put_task_struct(task);
diff --git a/queue-5.10/mm-memcg-set-memcg-when-splitting-page.patch b/queue-5.10/mm-memcg-set-memcg-when-splitting-page.patch

deleted file mode 100644 (file)

index 76d97e0..0000000
--- a/queue-5.10/mm-memcg-set-memcg-when-splitting-page.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From e1baddf8475b06cc56f4bafecf9a32a124343d9f Mon Sep 17 00:00:00 2001
-From: Zhou Guanghui <zhouguanghui1@huawei.com>
-Date: Fri, 12 Mar 2021 21:08:33 -0800
-Subject: mm/memcg: set memcg when splitting page
-
-From: Zhou Guanghui <zhouguanghui1@huawei.com>
-
-commit e1baddf8475b06cc56f4bafecf9a32a124343d9f upstream.
-
-As described in the split_page() comment, for the non-compound high order
-page, the sub-pages must be freed individually.  If the memcg of the first
-page is valid, the tail pages cannot be uncharged when be freed.
-
-For example, when alloc_pages_exact is used to allocate 1MB continuous
-physical memory, 2MB is charged(kmemcg is enabled and __GFP_ACCOUNT is
-set).  When make_alloc_exact free the unused 1MB and free_pages_exact free
-the applied 1MB, actually, only 4KB(one page) is uncharged.
-
-Therefore, the memcg of the tail page needs to be set when splitting a
-page.
-
-Michel:
-
-There are at least two explicit users of __GFP_ACCOUNT with
-alloc_exact_pages added recently.  See 7efe8ef274024 ("KVM: arm64:
-Allocate stage-2 pgd pages with GFP_KERNEL_ACCOUNT") and c419621873713
-("KVM: s390: Add memcg accounting to KVM allocations"), so this is not
-just a theoretical issue.
-
-Link: https://lkml.kernel.org/r/20210304074053.65527-3-zhouguanghui1@huawei.com
-Signed-off-by: Zhou Guanghui <zhouguanghui1@huawei.com>
-Acked-by: Johannes Weiner <hannes@cmpxchg.org>
-Reviewed-by: Zi Yan <ziy@nvidia.com>
-Reviewed-by: Shakeel Butt <shakeelb@google.com>
-Acked-by: Michal Hocko <mhocko@suse.com>
-Cc: Hanjun Guo <guohanjun@huawei.com>
-Cc: Hugh Dickins <hughd@google.com>
-Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
-Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
-Cc: Nicholas Piggin <npiggin@gmail.com>
-Cc: Rui Xiang <rui.xiang@huawei.com>
-Cc: Tianhong Ding <dingtianhong@huawei.com>
-Cc: Weilong Chen <chenweilong@huawei.com>
-Cc: <stable@vger.kernel.org>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- mm/page_alloc.c |    1 +
- 1 file changed, 1 insertion(+)
-
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -3272,6 +3272,7 @@ void split_page(struct page *page, unsig
-       for (i = 1; i < (1 << order); i++)
-               set_page_refcounted(page + i);
-       split_page_owner(page, 1 << order);
-+      split_page_memcg(page, 1 << order);
- }
- EXPORT_SYMBOL_GPL(split_page);
- 
diff --git a/queue-5.10/series b/queue-5.10/series

index fec51a870755b1007de442c184ee2cc8d922a936..8d36a635eba7b5a161b656906fb2b42eb9d19147 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -270,6 +270,7 @@ binfmt_misc-fix-possible-deadlock-in-bm_register_write.patch
  x86-unwind-orc-disable-kasan-checking-in-the-orc-unwinder-part-2.patch
  x86-sev-es-introduce-ip_within_syscall_gap-helper.patch
  x86-sev-es-check-regs-sp-is-trusted-before-adjusting-vc-ist-stack.patch
+x86-entry-move-nmi-entry-exit-into-common-code.patch
  x86-sev-es-correctly-track-irq-states-in-runtime-vc-handler.patch
  x86-sev-es-use-__copy_from_user_inatomic.patch
  x86-entry-fix-entry-exit-mismatch-on-failed-fast-32-bit-syscalls.patch
@@ -281,4 +282,4 @@ kvm-arm64-nvhe-save-the-spe-context-early.patch
  kvm-arm64-reject-vm-creation-when-the-default-ipa-size-is-unsupported.patch
  kvm-arm64-fix-exclusive-limit-for-ipa-size.patch
  mm-userfaultfd-fix-memory-corruption-due-to-writeprotect.patch
-mm-memcg-set-memcg-when-splitting-page.patch
+mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch
diff --git a/queue-5.10/x86-entry-move-nmi-entry-exit-into-common-code.patch b/queue-5.10/x86-entry-move-nmi-entry-exit-into-common-code.patch

new file mode 100644 (file)

index 0000000..a54f920
--- /dev/null
+++ b/queue-5.10/x86-entry-move-nmi-entry-exit-into-common-code.patch
@@ -0,0 +1,296 @@
+From b6be002bcd1dd1dedb926abf3c90c794eacb77dc Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Mon, 2 Nov 2020 12:53:16 -0800
+Subject: x86/entry: Move nmi entry/exit into common code
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit b6be002bcd1dd1dedb926abf3c90c794eacb77dc upstream.
+
+Lockdep state handling on NMI enter and exit is nothing specific to X86. It's
+not any different on other architectures. Also the extra state type is not
+necessary, irqentry_state_t can carry the necessary information as well.
+
+Move it to common code and extend irqentry_state_t to carry lockdep state.
+
+[ Ira: Make exit_rcu and lockdep a union as they are mutually exclusive
+  between the IRQ and NMI exceptions, and add kernel documentation for
+  struct irqentry_state_t ]
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ira Weiny <ira.weiny@intel.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20201102205320.1458656-7-ira.weiny@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/entry/common.c         |   34 ----------------------------------
+ arch/x86/include/asm/idtentry.h |    3 ---
+ arch/x86/kernel/cpu/mce/core.c  |    6 +++---
+ arch/x86/kernel/nmi.c           |    6 +++---
+ arch/x86/kernel/traps.c         |   13 +++++++------
+ include/linux/entry-common.h    |   39 ++++++++++++++++++++++++++++++++++++++-
+ kernel/entry/common.c           |   36 ++++++++++++++++++++++++++++++++++++
+ 7 files changed, 87 insertions(+), 50 deletions(-)
+
+--- a/arch/x86/entry/common.c
++++ b/arch/x86/entry/common.c
+@@ -213,40 +213,6 @@ SYSCALL_DEFINE0(ni_syscall)
+       return -ENOSYS;
+ }
+ 
+-noinstr bool idtentry_enter_nmi(struct pt_regs *regs)
+-{
+-      bool irq_state = lockdep_hardirqs_enabled();
+-
+-      __nmi_enter();
+-      lockdep_hardirqs_off(CALLER_ADDR0);
+-      lockdep_hardirq_enter();
+-      rcu_nmi_enter();
+-
+-      instrumentation_begin();
+-      trace_hardirqs_off_finish();
+-      ftrace_nmi_enter();
+-      instrumentation_end();
+-
+-      return irq_state;
+-}
+-
+-noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore)
+-{
+-      instrumentation_begin();
+-      ftrace_nmi_exit();
+-      if (restore) {
+-              trace_hardirqs_on_prepare();
+-              lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+-      }
+-      instrumentation_end();
+-
+-      rcu_nmi_exit();
+-      lockdep_hardirq_exit();
+-      if (restore)
+-              lockdep_hardirqs_on(CALLER_ADDR0);
+-      __nmi_exit();
+-}
+-
+ #ifdef CONFIG_XEN_PV
+ #ifndef CONFIG_PREEMPTION
+ /*
+--- a/arch/x86/include/asm/idtentry.h
++++ b/arch/x86/include/asm/idtentry.h
+@@ -11,9 +11,6 @@
+ 
+ #include <asm/irq_stack.h>
+ 
+-bool idtentry_enter_nmi(struct pt_regs *regs);
+-void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state);
+-
+ /**
+  * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
+  *                  No error code pushed by hardware
+--- a/arch/x86/kernel/cpu/mce/core.c
++++ b/arch/x86/kernel/cpu/mce/core.c
+@@ -1986,7 +1986,7 @@ void (*machine_check_vector)(struct pt_r
+ 
+ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
+ {
+-      bool irq_state;
++      irqentry_state_t irq_state;
+ 
+       WARN_ON_ONCE(user_mode(regs));
+ 
+@@ -1998,7 +1998,7 @@ static __always_inline void exc_machine_
+           mce_check_crashing_cpu())
+               return;
+ 
+-      irq_state = idtentry_enter_nmi(regs);
++      irq_state = irqentry_nmi_enter(regs);
+       /*
+        * The call targets are marked noinstr, but objtool can't figure
+        * that out because it's an indirect call. Annotate it.
+@@ -2009,7 +2009,7 @@ static __always_inline void exc_machine_
+       if (regs->flags & X86_EFLAGS_IF)
+               trace_hardirqs_on_prepare();
+       instrumentation_end();
+-      idtentry_exit_nmi(regs, irq_state);
++      irqentry_nmi_exit(regs, irq_state);
+ }
+ 
+ static __always_inline void exc_machine_check_user(struct pt_regs *regs)
+--- a/arch/x86/kernel/nmi.c
++++ b/arch/x86/kernel/nmi.c
+@@ -475,7 +475,7 @@ static DEFINE_PER_CPU(unsigned long, nmi
+ 
+ DEFINE_IDTENTRY_RAW(exc_nmi)
+ {
+-      bool irq_state;
++      irqentry_state_t irq_state;
+ 
+       /*
+        * Re-enable NMIs right here when running as an SEV-ES guest. This might
+@@ -502,14 +502,14 @@ nmi_restart:
+ 
+       this_cpu_write(nmi_dr7, local_db_save());
+ 
+-      irq_state = idtentry_enter_nmi(regs);
++      irq_state = irqentry_nmi_enter(regs);
+ 
+       inc_irq_stat(__nmi_count);
+ 
+       if (!ignore_nmis)
+               default_do_nmi(regs);
+ 
+-      idtentry_exit_nmi(regs, irq_state);
++      irqentry_nmi_exit(regs, irq_state);
+ 
+       local_db_restore(this_cpu_read(nmi_dr7));
+ 
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -406,7 +406,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
+       }
+ #endif
+ 
+-      idtentry_enter_nmi(regs);
++      irqentry_nmi_enter(regs);
+       instrumentation_begin();
+       notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
+ 
+@@ -652,12 +652,13 @@ DEFINE_IDTENTRY_RAW(exc_int3)
+               instrumentation_end();
+               irqentry_exit_to_user_mode(regs);
+       } else {
+-              bool irq_state = idtentry_enter_nmi(regs);
++              irqentry_state_t irq_state = irqentry_nmi_enter(regs);
++
+               instrumentation_begin();
+               if (!do_int3(regs))
+                       die("int3", regs, 0);
+               instrumentation_end();
+-              idtentry_exit_nmi(regs, irq_state);
++              irqentry_nmi_exit(regs, irq_state);
+       }
+ }
+ 
+@@ -851,7 +852,7 @@ static __always_inline void exc_debug_ke
+        * includes the entry stack is excluded for everything.
+        */
+       unsigned long dr7 = local_db_save();
+-      bool irq_state = idtentry_enter_nmi(regs);
++      irqentry_state_t irq_state = irqentry_nmi_enter(regs);
+       instrumentation_begin();
+ 
+       /*
+@@ -908,7 +909,7 @@ static __always_inline void exc_debug_ke
+               regs->flags &= ~X86_EFLAGS_TF;
+ out:
+       instrumentation_end();
+-      idtentry_exit_nmi(regs, irq_state);
++      irqentry_nmi_exit(regs, irq_state);
+ 
+       local_db_restore(dr7);
+ }
+@@ -926,7 +927,7 @@ static __always_inline void exc_debug_us
+ 
+       /*
+        * NB: We can't easily clear DR7 here because
+-       * idtentry_exit_to_usermode() can invoke ptrace, schedule, access
++       * irqentry_exit_to_usermode() can invoke ptrace, schedule, access
+        * user memory, etc.  This means that a recursive #DB is possible.  If
+        * this happens, that #DB will hit exc_debug_kernel() and clear DR7.
+        * Since we're not on the IST stack right now, everything will be
+--- a/include/linux/entry-common.h
++++ b/include/linux/entry-common.h
+@@ -341,8 +341,26 @@ void irqentry_enter_from_user_mode(struc
+ void irqentry_exit_to_user_mode(struct pt_regs *regs);
+ 
+ #ifndef irqentry_state
++/**
++ * struct irqentry_state - Opaque object for exception state storage
++ * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
++ *            exit path has to invoke rcu_irq_exit().
++ * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
++ *           lockdep state is restored correctly on exit from nmi.
++ *
++ * This opaque object is filled in by the irqentry_*_enter() functions and
++ * must be passed back into the corresponding irqentry_*_exit() functions
++ * when the exception is complete.
++ *
++ * Callers of irqentry_*_[enter|exit]() must consider this structure opaque
++ * and all members private.  Descriptions of the members are provided to aid in
++ * the maintenance of the irqentry_*() functions.
++ */
+ typedef struct irqentry_state {
+-      bool    exit_rcu;
++      union {
++              bool    exit_rcu;
++              bool    lockdep;
++      };
+ } irqentry_state_t;
+ #endif
+ 
+@@ -402,4 +420,23 @@ void irqentry_exit_cond_resched(void);
+  */
+ void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state);
+ 
++/**
++ * irqentry_nmi_enter - Handle NMI entry
++ * @regs:     Pointer to currents pt_regs
++ *
++ * Similar to irqentry_enter() but taking care of the NMI constraints.
++ */
++irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs);
++
++/**
++ * irqentry_nmi_exit - Handle return from NMI handling
++ * @regs:     Pointer to pt_regs (NMI entry regs)
++ * @irq_state:        Return value from matching call to irqentry_nmi_enter()
++ *
++ * Last action before returning to the low level assmenbly code.
++ *
++ * Counterpart to irqentry_nmi_enter().
++ */
++void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state);
++
+ #endif
+--- a/kernel/entry/common.c
++++ b/kernel/entry/common.c
+@@ -397,3 +397,39 @@ noinstr void irqentry_exit(struct pt_reg
+                       rcu_irq_exit();
+       }
+ }
++
++irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
++{
++      irqentry_state_t irq_state;
++
++      irq_state.lockdep = lockdep_hardirqs_enabled();
++
++      __nmi_enter();
++      lockdep_hardirqs_off(CALLER_ADDR0);
++      lockdep_hardirq_enter();
++      rcu_nmi_enter();
++
++      instrumentation_begin();
++      trace_hardirqs_off_finish();
++      ftrace_nmi_enter();
++      instrumentation_end();
++
++      return irq_state;
++}
++
++void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
++{
++      instrumentation_begin();
++      ftrace_nmi_exit();
++      if (irq_state.lockdep) {
++              trace_hardirqs_on_prepare();
++              lockdep_hardirqs_on_prepare(CALLER_ADDR0);
++      }
++      instrumentation_end();
++
++      rcu_nmi_exit();
++      lockdep_hardirq_exit();
++      if (irq_state.lockdep)
++              lockdep_hardirqs_on(CALLER_ADDR0);
++      __nmi_exit();
++}
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 15 Mar 2021 11:17:14 +0000 (12:17 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 15 Mar 2021 11:17:14 +0000 (12:17 +0100)
queue-5.10/mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/mm-memcg-set-memcg-when-splitting-page.patch	[deleted file]	patch \| blob \| blame \| history
queue-5.10/series		patch \| blob \| blame \| history
queue-5.10/x86-entry-move-nmi-entry-exit-into-common-code.patch	[new file with mode: 0644]	patch \| blob