From: Greg Kroah-Hartman Date: Mon, 15 Mar 2021 11:17:14 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v4.4.262~19 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c6112415d3ad832fab1c602999358fd176415a2f;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch x86-entry-move-nmi-entry-exit-into-common-code.patch --- diff --git a/queue-5.10/mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch b/queue-5.10/mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch new file mode 100644 index 00000000000..b03361a70a9 --- /dev/null +++ b/queue-5.10/mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch @@ -0,0 +1,82 @@ +From 96cfe2c0fd23ea7c2368d14f769d287e7ae1082e Mon Sep 17 00:00:00 2001 +From: Suren Baghdasaryan +Date: Fri, 12 Mar 2021 21:08:06 -0800 +Subject: mm/madvise: replace ptrace attach requirement for process_madvise + +From: Suren Baghdasaryan + +commit 96cfe2c0fd23ea7c2368d14f769d287e7ae1082e upstream. + +process_madvise currently requires ptrace attach capability. +PTRACE_MODE_ATTACH gives one process complete control over another +process. It effectively removes the security boundary between the two +processes (in one direction). Granting ptrace attach capability even to a +system process is considered dangerous since it creates an attack surface. +This severely limits the usage of this API. + +The operations process_madvise can perform do not affect the correctness +of the operation of the target process; they only affect where the data is +physically located (and therefore, how fast it can be accessed). What we +want is the ability for one process to influence another process in order +to optimize performance across the entire system while leaving the +security boundary intact. + +Replace PTRACE_MODE_ATTACH with a combination of PTRACE_MODE_READ and +CAP_SYS_NICE. PTRACE_MODE_READ to prevent leaking ASLR metadata and +CAP_SYS_NICE for influencing process performance. + +Link: https://lkml.kernel.org/r/20210303185807.2160264-1-surenb@google.com +Signed-off-by: Suren Baghdasaryan +Reviewed-by: Kees Cook +Acked-by: Minchan Kim +Acked-by: David Rientjes +Cc: Jann Horn +Cc: Jeff Vander Stoep +Cc: Michal Hocko +Cc: Shakeel Butt +Cc: Tim Murray +Cc: Florian Weimer +Cc: Oleg Nesterov +Cc: James Morris +Cc: [5.10+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/madvise.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -1202,12 +1202,22 @@ SYSCALL_DEFINE5(process_madvise, int, pi + goto release_task; + } + +- mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS); ++ /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ ++ mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); + if (IS_ERR_OR_NULL(mm)) { + ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; + goto release_task; + } + ++ /* ++ * Require CAP_SYS_NICE for influencing process performance. Note that ++ * only non-destructive hints are currently supported. ++ */ ++ if (!capable(CAP_SYS_NICE)) { ++ ret = -EPERM; ++ goto release_mm; ++ } ++ + total_len = iov_iter_count(&iter); + + while (iov_iter_count(&iter)) { +@@ -1222,6 +1232,7 @@ SYSCALL_DEFINE5(process_madvise, int, pi + if (ret == 0) + ret = total_len - iov_iter_count(&iter); + ++release_mm: + mmput(mm); + release_task: + put_task_struct(task); diff --git a/queue-5.10/mm-memcg-set-memcg-when-splitting-page.patch b/queue-5.10/mm-memcg-set-memcg-when-splitting-page.patch deleted file mode 100644 index 76d97e05ea9..00000000000 --- a/queue-5.10/mm-memcg-set-memcg-when-splitting-page.patch +++ /dev/null @@ -1,61 +0,0 @@ -From e1baddf8475b06cc56f4bafecf9a32a124343d9f Mon Sep 17 00:00:00 2001 -From: Zhou Guanghui -Date: Fri, 12 Mar 2021 21:08:33 -0800 -Subject: mm/memcg: set memcg when splitting page - -From: Zhou Guanghui - -commit e1baddf8475b06cc56f4bafecf9a32a124343d9f upstream. - -As described in the split_page() comment, for the non-compound high order -page, the sub-pages must be freed individually. If the memcg of the first -page is valid, the tail pages cannot be uncharged when be freed. - -For example, when alloc_pages_exact is used to allocate 1MB continuous -physical memory, 2MB is charged(kmemcg is enabled and __GFP_ACCOUNT is -set). When make_alloc_exact free the unused 1MB and free_pages_exact free -the applied 1MB, actually, only 4KB(one page) is uncharged. - -Therefore, the memcg of the tail page needs to be set when splitting a -page. - -Michel: - -There are at least two explicit users of __GFP_ACCOUNT with -alloc_exact_pages added recently. See 7efe8ef274024 ("KVM: arm64: -Allocate stage-2 pgd pages with GFP_KERNEL_ACCOUNT") and c419621873713 -("KVM: s390: Add memcg accounting to KVM allocations"), so this is not -just a theoretical issue. - -Link: https://lkml.kernel.org/r/20210304074053.65527-3-zhouguanghui1@huawei.com -Signed-off-by: Zhou Guanghui -Acked-by: Johannes Weiner -Reviewed-by: Zi Yan -Reviewed-by: Shakeel Butt -Acked-by: Michal Hocko -Cc: Hanjun Guo -Cc: Hugh Dickins -Cc: Kefeng Wang -Cc: "Kirill A. Shutemov" -Cc: Nicholas Piggin -Cc: Rui Xiang -Cc: Tianhong Ding -Cc: Weilong Chen -Cc: -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Signed-off-by: Greg Kroah-Hartman ---- - mm/page_alloc.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -3272,6 +3272,7 @@ void split_page(struct page *page, unsig - for (i = 1; i < (1 << order); i++) - set_page_refcounted(page + i); - split_page_owner(page, 1 << order); -+ split_page_memcg(page, 1 << order); - } - EXPORT_SYMBOL_GPL(split_page); - diff --git a/queue-5.10/series b/queue-5.10/series index fec51a87075..8d36a635eba 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -270,6 +270,7 @@ binfmt_misc-fix-possible-deadlock-in-bm_register_write.patch x86-unwind-orc-disable-kasan-checking-in-the-orc-unwinder-part-2.patch x86-sev-es-introduce-ip_within_syscall_gap-helper.patch x86-sev-es-check-regs-sp-is-trusted-before-adjusting-vc-ist-stack.patch +x86-entry-move-nmi-entry-exit-into-common-code.patch x86-sev-es-correctly-track-irq-states-in-runtime-vc-handler.patch x86-sev-es-use-__copy_from_user_inatomic.patch x86-entry-fix-entry-exit-mismatch-on-failed-fast-32-bit-syscalls.patch @@ -281,4 +282,4 @@ kvm-arm64-nvhe-save-the-spe-context-early.patch kvm-arm64-reject-vm-creation-when-the-default-ipa-size-is-unsupported.patch kvm-arm64-fix-exclusive-limit-for-ipa-size.patch mm-userfaultfd-fix-memory-corruption-due-to-writeprotect.patch -mm-memcg-set-memcg-when-splitting-page.patch +mm-madvise-replace-ptrace-attach-requirement-for-process_madvise.patch diff --git a/queue-5.10/x86-entry-move-nmi-entry-exit-into-common-code.patch b/queue-5.10/x86-entry-move-nmi-entry-exit-into-common-code.patch new file mode 100644 index 00000000000..a54f920f162 --- /dev/null +++ b/queue-5.10/x86-entry-move-nmi-entry-exit-into-common-code.patch @@ -0,0 +1,296 @@ +From b6be002bcd1dd1dedb926abf3c90c794eacb77dc Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 2 Nov 2020 12:53:16 -0800 +Subject: x86/entry: Move nmi entry/exit into common code + +From: Thomas Gleixner + +commit b6be002bcd1dd1dedb926abf3c90c794eacb77dc upstream. + +Lockdep state handling on NMI enter and exit is nothing specific to X86. It's +not any different on other architectures. Also the extra state type is not +necessary, irqentry_state_t can carry the necessary information as well. + +Move it to common code and extend irqentry_state_t to carry lockdep state. + +[ Ira: Make exit_rcu and lockdep a union as they are mutually exclusive + between the IRQ and NMI exceptions, and add kernel documentation for + struct irqentry_state_t ] + +Signed-off-by: Thomas Gleixner +Signed-off-by: Ira Weiny +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/r/20201102205320.1458656-7-ira.weiny@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/entry/common.c | 34 ---------------------------------- + arch/x86/include/asm/idtentry.h | 3 --- + arch/x86/kernel/cpu/mce/core.c | 6 +++--- + arch/x86/kernel/nmi.c | 6 +++--- + arch/x86/kernel/traps.c | 13 +++++++------ + include/linux/entry-common.h | 39 ++++++++++++++++++++++++++++++++++++++- + kernel/entry/common.c | 36 ++++++++++++++++++++++++++++++++++++ + 7 files changed, 87 insertions(+), 50 deletions(-) + +--- a/arch/x86/entry/common.c ++++ b/arch/x86/entry/common.c +@@ -213,40 +213,6 @@ SYSCALL_DEFINE0(ni_syscall) + return -ENOSYS; + } + +-noinstr bool idtentry_enter_nmi(struct pt_regs *regs) +-{ +- bool irq_state = lockdep_hardirqs_enabled(); +- +- __nmi_enter(); +- lockdep_hardirqs_off(CALLER_ADDR0); +- lockdep_hardirq_enter(); +- rcu_nmi_enter(); +- +- instrumentation_begin(); +- trace_hardirqs_off_finish(); +- ftrace_nmi_enter(); +- instrumentation_end(); +- +- return irq_state; +-} +- +-noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore) +-{ +- instrumentation_begin(); +- ftrace_nmi_exit(); +- if (restore) { +- trace_hardirqs_on_prepare(); +- lockdep_hardirqs_on_prepare(CALLER_ADDR0); +- } +- instrumentation_end(); +- +- rcu_nmi_exit(); +- lockdep_hardirq_exit(); +- if (restore) +- lockdep_hardirqs_on(CALLER_ADDR0); +- __nmi_exit(); +-} +- + #ifdef CONFIG_XEN_PV + #ifndef CONFIG_PREEMPTION + /* +--- a/arch/x86/include/asm/idtentry.h ++++ b/arch/x86/include/asm/idtentry.h +@@ -11,9 +11,6 @@ + + #include + +-bool idtentry_enter_nmi(struct pt_regs *regs); +-void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state); +- + /** + * DECLARE_IDTENTRY - Declare functions for simple IDT entry points + * No error code pushed by hardware +--- a/arch/x86/kernel/cpu/mce/core.c ++++ b/arch/x86/kernel/cpu/mce/core.c +@@ -1986,7 +1986,7 @@ void (*machine_check_vector)(struct pt_r + + static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) + { +- bool irq_state; ++ irqentry_state_t irq_state; + + WARN_ON_ONCE(user_mode(regs)); + +@@ -1998,7 +1998,7 @@ static __always_inline void exc_machine_ + mce_check_crashing_cpu()) + return; + +- irq_state = idtentry_enter_nmi(regs); ++ irq_state = irqentry_nmi_enter(regs); + /* + * The call targets are marked noinstr, but objtool can't figure + * that out because it's an indirect call. Annotate it. +@@ -2009,7 +2009,7 @@ static __always_inline void exc_machine_ + if (regs->flags & X86_EFLAGS_IF) + trace_hardirqs_on_prepare(); + instrumentation_end(); +- idtentry_exit_nmi(regs, irq_state); ++ irqentry_nmi_exit(regs, irq_state); + } + + static __always_inline void exc_machine_check_user(struct pt_regs *regs) +--- a/arch/x86/kernel/nmi.c ++++ b/arch/x86/kernel/nmi.c +@@ -475,7 +475,7 @@ static DEFINE_PER_CPU(unsigned long, nmi + + DEFINE_IDTENTRY_RAW(exc_nmi) + { +- bool irq_state; ++ irqentry_state_t irq_state; + + /* + * Re-enable NMIs right here when running as an SEV-ES guest. This might +@@ -502,14 +502,14 @@ nmi_restart: + + this_cpu_write(nmi_dr7, local_db_save()); + +- irq_state = idtentry_enter_nmi(regs); ++ irq_state = irqentry_nmi_enter(regs); + + inc_irq_stat(__nmi_count); + + if (!ignore_nmis) + default_do_nmi(regs); + +- idtentry_exit_nmi(regs, irq_state); ++ irqentry_nmi_exit(regs, irq_state); + + local_db_restore(this_cpu_read(nmi_dr7)); + +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -406,7 +406,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault) + } + #endif + +- idtentry_enter_nmi(regs); ++ irqentry_nmi_enter(regs); + instrumentation_begin(); + notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); + +@@ -652,12 +652,13 @@ DEFINE_IDTENTRY_RAW(exc_int3) + instrumentation_end(); + irqentry_exit_to_user_mode(regs); + } else { +- bool irq_state = idtentry_enter_nmi(regs); ++ irqentry_state_t irq_state = irqentry_nmi_enter(regs); ++ + instrumentation_begin(); + if (!do_int3(regs)) + die("int3", regs, 0); + instrumentation_end(); +- idtentry_exit_nmi(regs, irq_state); ++ irqentry_nmi_exit(regs, irq_state); + } + } + +@@ -851,7 +852,7 @@ static __always_inline void exc_debug_ke + * includes the entry stack is excluded for everything. + */ + unsigned long dr7 = local_db_save(); +- bool irq_state = idtentry_enter_nmi(regs); ++ irqentry_state_t irq_state = irqentry_nmi_enter(regs); + instrumentation_begin(); + + /* +@@ -908,7 +909,7 @@ static __always_inline void exc_debug_ke + regs->flags &= ~X86_EFLAGS_TF; + out: + instrumentation_end(); +- idtentry_exit_nmi(regs, irq_state); ++ irqentry_nmi_exit(regs, irq_state); + + local_db_restore(dr7); + } +@@ -926,7 +927,7 @@ static __always_inline void exc_debug_us + + /* + * NB: We can't easily clear DR7 here because +- * idtentry_exit_to_usermode() can invoke ptrace, schedule, access ++ * irqentry_exit_to_usermode() can invoke ptrace, schedule, access + * user memory, etc. This means that a recursive #DB is possible. If + * this happens, that #DB will hit exc_debug_kernel() and clear DR7. + * Since we're not on the IST stack right now, everything will be +--- a/include/linux/entry-common.h ++++ b/include/linux/entry-common.h +@@ -341,8 +341,26 @@ void irqentry_enter_from_user_mode(struc + void irqentry_exit_to_user_mode(struct pt_regs *regs); + + #ifndef irqentry_state ++/** ++ * struct irqentry_state - Opaque object for exception state storage ++ * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the ++ * exit path has to invoke rcu_irq_exit(). ++ * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that ++ * lockdep state is restored correctly on exit from nmi. ++ * ++ * This opaque object is filled in by the irqentry_*_enter() functions and ++ * must be passed back into the corresponding irqentry_*_exit() functions ++ * when the exception is complete. ++ * ++ * Callers of irqentry_*_[enter|exit]() must consider this structure opaque ++ * and all members private. Descriptions of the members are provided to aid in ++ * the maintenance of the irqentry_*() functions. ++ */ + typedef struct irqentry_state { +- bool exit_rcu; ++ union { ++ bool exit_rcu; ++ bool lockdep; ++ }; + } irqentry_state_t; + #endif + +@@ -402,4 +420,23 @@ void irqentry_exit_cond_resched(void); + */ + void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state); + ++/** ++ * irqentry_nmi_enter - Handle NMI entry ++ * @regs: Pointer to currents pt_regs ++ * ++ * Similar to irqentry_enter() but taking care of the NMI constraints. ++ */ ++irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs); ++ ++/** ++ * irqentry_nmi_exit - Handle return from NMI handling ++ * @regs: Pointer to pt_regs (NMI entry regs) ++ * @irq_state: Return value from matching call to irqentry_nmi_enter() ++ * ++ * Last action before returning to the low level assmenbly code. ++ * ++ * Counterpart to irqentry_nmi_enter(). ++ */ ++void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state); ++ + #endif +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -397,3 +397,39 @@ noinstr void irqentry_exit(struct pt_reg + rcu_irq_exit(); + } + } ++ ++irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs) ++{ ++ irqentry_state_t irq_state; ++ ++ irq_state.lockdep = lockdep_hardirqs_enabled(); ++ ++ __nmi_enter(); ++ lockdep_hardirqs_off(CALLER_ADDR0); ++ lockdep_hardirq_enter(); ++ rcu_nmi_enter(); ++ ++ instrumentation_begin(); ++ trace_hardirqs_off_finish(); ++ ftrace_nmi_enter(); ++ instrumentation_end(); ++ ++ return irq_state; ++} ++ ++void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) ++{ ++ instrumentation_begin(); ++ ftrace_nmi_exit(); ++ if (irq_state.lockdep) { ++ trace_hardirqs_on_prepare(); ++ lockdep_hardirqs_on_prepare(CALLER_ADDR0); ++ } ++ instrumentation_end(); ++ ++ rcu_nmi_exit(); ++ lockdep_hardirq_exit(); ++ if (irq_state.lockdep) ++ lockdep_hardirqs_on(CALLER_ADDR0); ++ __nmi_exit(); ++}