From: Greg Kroah-Hartman Date: Mon, 6 Jun 2022 09:43:30 +0000 (+0200) Subject: 5.17-stable patches X-Git-Tag: v5.10.121~151 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8b927cc7c365e3ed439250e8c0b554100a32acac;p=thirdparty%2Fkernel%2Fstable-queue.git 5.17-stable patches added patches: kthread-don-t-allocate-kthread_struct-for-init-and-umh.patch perf-x86-intel-fix-event-constraints-for-icl.patch platform-x86-intel-hid-fix-_dsm-function-index-handling.patch ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch x86-kexec-fix-memory-leak-of-elf-header-buffer.patch x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch --- diff --git a/queue-5.17/kthread-don-t-allocate-kthread_struct-for-init-and-umh.patch b/queue-5.17/kthread-don-t-allocate-kthread_struct-for-init-and-umh.patch new file mode 100644 index 00000000000..29fd0a1ac0e --- /dev/null +++ b/queue-5.17/kthread-don-t-allocate-kthread_struct-for-init-and-umh.patch @@ -0,0 +1,181 @@ +From 343f4c49f2438d8920f1f76fa823ee59b91f02e4 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Mon, 11 Apr 2022 11:40:14 -0500 +Subject: kthread: Don't allocate kthread_struct for init and umh +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Eric W. Biederman + +commit 343f4c49f2438d8920f1f76fa823ee59b91f02e4 upstream. + +If kthread_is_per_cpu runs concurrently with free_kthread_struct the +kthread_struct that was just freed may be read from. + +This bug was introduced by commit 40966e316f86 ("kthread: Ensure +struct kthread is present for all kthreads"). When kthread_struct +started to be allocated for all tasks that have PF_KTHREAD set. This +in turn required the kthread_struct to be freed in kernel_execve and +violated the assumption that kthread_struct will have the same +lifetime as the task. + +Looking a bit deeper this only applies to callers of kernel_execve +which is just the init process and the user mode helper processes. +These processes really don't want to be kernel threads but are for +historical reasons. Mostly that copy_thread does not know how to take +a kernel mode function to the process with for processes without +PF_KTHREAD or PF_IO_WORKER set. + +Solve this by not allocating kthread_struct for the init process and +the user mode helper processes. + +This is done by adding a kthread member to struct kernel_clone_args. +Setting kthread in fork_idle and kernel_thread. Adding +user_mode_thread that works like kernel_thread except it does not set +kthread. In fork only allocating the kthread_struct if .kthread is set. + +I have looked at kernel/kthread.c and since commit 40966e316f86 +("kthread: Ensure struct kthread is present for all kthreads") there +have been no assumptions added that to_kthread or __to_kthread will +not return NULL. + +There are a few callers of to_kthread or __to_kthread that assume a +non-NULL struct kthread pointer will be returned. These functions are +kthread_data(), kthread_parmme(), kthread_exit(), kthread(), +kthread_park(), kthread_unpark(), kthread_stop(). All of those functions +can reasonably expected to be called when it is know that a task is a +kthread so that assumption seems reasonable. + +Cc: stable@vger.kernel.org +Fixes: 40966e316f86 ("kthread: Ensure struct kthread is present for all kthreads") +Reported-by: Максим Кутявин +Link: https://lkml.kernel.org/r/20220506141512.516114-1-ebiederm@xmission.com +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman +--- + fs/exec.c | 6 ++++-- + include/linux/sched/task.h | 2 ++ + init/main.c | 2 +- + kernel/fork.c | 22 ++++++++++++++++++++-- + kernel/umh.c | 6 +++--- + 5 files changed, 30 insertions(+), 8 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1315,8 +1315,6 @@ int begin_new_exec(struct linux_binprm * + */ + force_uaccess_begin(); + +- if (me->flags & PF_KTHREAD) +- free_kthread_struct(me); + me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | + PF_NOFREEZE | PF_NO_SETAFFINITY); + flush_thread(); +@@ -1962,6 +1960,10 @@ int kernel_execve(const char *kernel_fil + int fd = AT_FDCWD; + int retval; + ++ if (WARN_ON_ONCE((current->flags & PF_KTHREAD) && ++ (current->worker_private))) ++ return -EINVAL; ++ + filename = getname_kernel(kernel_filename); + if (IS_ERR(filename)) + return PTR_ERR(filename); +--- a/include/linux/sched/task.h ++++ b/include/linux/sched/task.h +@@ -32,6 +32,7 @@ struct kernel_clone_args { + size_t set_tid_size; + int cgroup; + int io_thread; ++ int kthread; + struct cgroup *cgrp; + struct css_set *cset; + }; +@@ -89,6 +90,7 @@ struct task_struct *create_io_thread(int + struct task_struct *fork_idle(int); + struct mm_struct *copy_init_mm(void); + extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); ++extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags); + extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); + int kernel_wait(pid_t pid, int *stat); + +--- a/init/main.c ++++ b/init/main.c +@@ -688,7 +688,7 @@ noinline void __ref rest_init(void) + * the init task will end up wanting to create kthreads, which, if + * we schedule it before we create kthreadd, will OOPS. + */ +- pid = kernel_thread(kernel_init, NULL, CLONE_FS); ++ pid = user_mode_thread(kernel_init, NULL, CLONE_FS); + /* + * Pin init on the boot CPU. Task migration is not properly working + * until sched_init_smp() has been run. It will set the allowed +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -2087,7 +2087,7 @@ static __latent_entropy struct task_stru + p->io_context = NULL; + audit_set_context(p, NULL); + cgroup_fork(p); +- if (p->flags & PF_KTHREAD) { ++ if (args->kthread) { + if (!set_kthread_struct(p)) + goto bad_fork_cleanup_delayacct; + } +@@ -2474,7 +2474,8 @@ struct task_struct * __init fork_idle(in + { + struct task_struct *task; + struct kernel_clone_args args = { +- .flags = CLONE_VM, ++ .flags = CLONE_VM, ++ .kthread = 1, + }; + + task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args); +@@ -2608,6 +2609,23 @@ pid_t kernel_thread(int (*fn)(void *), v + { + struct kernel_clone_args args = { + .flags = ((lower_32_bits(flags) | CLONE_VM | ++ CLONE_UNTRACED) & ~CSIGNAL), ++ .exit_signal = (lower_32_bits(flags) & CSIGNAL), ++ .stack = (unsigned long)fn, ++ .stack_size = (unsigned long)arg, ++ .kthread = 1, ++ }; ++ ++ return kernel_clone(&args); ++} ++ ++/* ++ * Create a user mode thread. ++ */ ++pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags) ++{ ++ struct kernel_clone_args args = { ++ .flags = ((lower_32_bits(flags) | CLONE_VM | + CLONE_UNTRACED) & ~CSIGNAL), + .exit_signal = (lower_32_bits(flags) & CSIGNAL), + .stack = (unsigned long)fn, +--- a/kernel/umh.c ++++ b/kernel/umh.c +@@ -132,7 +132,7 @@ static void call_usermodehelper_exec_syn + + /* If SIGCLD is ignored do_wait won't populate the status. */ + kernel_sigaction(SIGCHLD, SIG_DFL); +- pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); ++ pid = user_mode_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); + if (pid < 0) + sub_info->retval = pid; + else +@@ -171,8 +171,8 @@ static void call_usermodehelper_exec_wor + * want to pollute current->children, and we need a parent + * that always ignores SIGCHLD to ensure auto-reaping. + */ +- pid = kernel_thread(call_usermodehelper_exec_async, sub_info, +- CLONE_PARENT | SIGCHLD); ++ pid = user_mode_thread(call_usermodehelper_exec_async, sub_info, ++ CLONE_PARENT | SIGCHLD); + if (pid < 0) { + sub_info->retval = pid; + umh_complete(sub_info); diff --git a/queue-5.17/perf-x86-intel-fix-event-constraints-for-icl.patch b/queue-5.17/perf-x86-intel-fix-event-constraints-for-icl.patch new file mode 100644 index 00000000000..d0bc4725a12 --- /dev/null +++ b/queue-5.17/perf-x86-intel-fix-event-constraints-for-icl.patch @@ -0,0 +1,35 @@ +From 86dca369075b3e310c3c0adb0f81e513c562b5e4 Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Wed, 25 May 2022 06:39:52 -0700 +Subject: perf/x86/intel: Fix event constraints for ICL + +From: Kan Liang + +commit 86dca369075b3e310c3c0adb0f81e513c562b5e4 upstream. + +According to the latest event list, the event encoding 0x55 +INST_DECODED.DECODERS and 0x56 UOPS_DECODED.DEC0 are only available on +the first 4 counters. Add them into the event constraints table. + +Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support") +Signed-off-by: Kan Liang +Signed-off-by: Ingo Molnar +Acked-by: Peter Zijlstra +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220525133952.1660658-1-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -255,7 +255,7 @@ static struct event_constraint intel_icl + INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), + INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ +- INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), ++ INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ + INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ diff --git a/queue-5.17/platform-x86-intel-hid-fix-_dsm-function-index-handling.patch b/queue-5.17/platform-x86-intel-hid-fix-_dsm-function-index-handling.patch new file mode 100644 index 00000000000..a969519224e --- /dev/null +++ b/queue-5.17/platform-x86-intel-hid-fix-_dsm-function-index-handling.patch @@ -0,0 +1,39 @@ +From 1620c80bba53af8c547bab34a1d3bc58319fe608 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= +Date: Tue, 17 May 2022 20:31:30 +0200 +Subject: platform/x86: intel-hid: fix _DSM function index handling +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michael Niewöhner + +commit 1620c80bba53af8c547bab34a1d3bc58319fe608 upstream. + +intel_hid_dsm_fn_mask is a bit mask containing one bit for each function +index. Fix the function index check in intel_hid_evaluate_method +accordingly, which was missed in commit 97ab4516205e ("platform/x86: +intel-hid: fix _DSM function index handling"). + +Fixes: 97ab4516205e ("platform/x86: intel-hid: fix _DSM function index handling") +Cc: stable@vger.kernel.org +Signed-off-by: Michael Niewöhner +Link: https://lore.kernel.org/r/66f813f5bcc724a0f6dd5adefe6a9728dbe509e3.camel@mniewoehner.de +Reviewed-by: Hans de Goede +Signed-off-by: Hans de Goede +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/x86/intel/hid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/platform/x86/intel/hid.c ++++ b/drivers/platform/x86/intel/hid.c +@@ -238,7 +238,7 @@ static bool intel_hid_evaluate_method(ac + + method_name = (char *)intel_hid_dsm_fn_to_method[fn_index]; + +- if (!(intel_hid_dsm_fn_mask & fn_index)) ++ if (!(intel_hid_dsm_fn_mask & BIT(fn_index))) + goto skip_dsm_eval; + + obj = acpi_evaluate_dsm_typed(handle, &intel_dsm_guid, diff --git a/queue-5.17/ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch b/queue-5.17/ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch new file mode 100644 index 00000000000..396b49844a0 --- /dev/null +++ b/queue-5.17/ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch @@ -0,0 +1,71 @@ +From 6a2d90ba027adba528509ffa27097cffd3879257 Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Fri, 29 Apr 2022 09:23:55 -0500 +Subject: ptrace: Reimplement PTRACE_KILL by always sending SIGKILL + +From: Eric W. Biederman + +commit 6a2d90ba027adba528509ffa27097cffd3879257 upstream. + +The current implementation of PTRACE_KILL is buggy and has been for +many years as it assumes it's target has stopped in ptrace_stop. At a +quick skim it looks like this assumption has existed since ptrace +support was added in linux v1.0. + +While PTRACE_KILL has been deprecated we can not remove it as +a quick search with google code search reveals many existing +programs calling it. + +When the ptracee is not stopped at ptrace_stop some fields would be +set that are ignored except in ptrace_stop. Making the userspace +visible behavior of PTRACE_KILL a noop in those case. + +As the usual rules are not obeyed it is not clear what the +consequences are of calling PTRACE_KILL on a running process. +Presumably userspace does not do this as it achieves nothing. + +Replace the implementation of PTRACE_KILL with a simple +send_sig_info(SIGKILL) followed by a return 0. This changes the +observable user space behavior only in that PTRACE_KILL on a process +not stopped in ptrace_stop will also kill it. As that has always +been the intent of the code this seems like a reasonable change. + +Cc: stable@vger.kernel.org +Reported-by: Al Viro +Suggested-by: Al Viro +Tested-by: Kees Cook +Reviewed-by: Oleg Nesterov +Link: https://lkml.kernel.org/r/20220505182645.497868-7-ebiederm@xmission.com +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/step.c | 3 +-- + kernel/ptrace.c | 5 ++--- + 2 files changed, 3 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/step.c ++++ b/arch/x86/kernel/step.c +@@ -180,8 +180,7 @@ void set_task_blockstep(struct task_stru + * + * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if + * task is current or it can't be running, otherwise we can race +- * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but +- * PTRACE_KILL is not safe. ++ * with __switch_to_xtra(). We rely on ptrace_freeze_traced(). + */ + local_irq_disable(); + debugctl = get_debugctlmsr(); +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -1236,9 +1236,8 @@ int ptrace_request(struct task_struct *c + return ptrace_resume(child, request, data); + + case PTRACE_KILL: +- if (child->exit_state) /* already dead */ +- return 0; +- return ptrace_resume(child, request, SIGKILL); ++ send_sig_info(SIGKILL, SEND_SIG_NOINFO, child); ++ return 0; + + #ifdef CONFIG_HAVE_ARCH_TRACEHOOK + case PTRACE_GETREGSET: diff --git a/queue-5.17/ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch b/queue-5.17/ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch new file mode 100644 index 00000000000..bc5c3baa68d --- /dev/null +++ b/queue-5.17/ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch @@ -0,0 +1,140 @@ +From c200e4bb44e80b343c09841e7caaaca0aac5e5fa Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Tue, 26 Apr 2022 16:30:17 -0500 +Subject: ptrace/um: Replace PT_DTRACE with TIF_SINGLESTEP + +From: Eric W. Biederman + +commit c200e4bb44e80b343c09841e7caaaca0aac5e5fa upstream. + +User mode linux is the last user of the PT_DTRACE flag. Using the flag to indicate +single stepping is a little confusing and worse changing tsk->ptrace without locking +could potentionally cause problems. + +So use a thread info flag with a better name instead of flag in tsk->ptrace. + +Remove the definition PT_DTRACE as uml is the last user. + +Cc: stable@vger.kernel.org +Acked-by: Johannes Berg +Tested-by: Kees Cook +Reviewed-by: Oleg Nesterov +Link: https://lkml.kernel.org/r/20220505182645.497868-3-ebiederm@xmission.com +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman +--- + arch/um/include/asm/thread_info.h | 2 ++ + arch/um/kernel/exec.c | 2 +- + arch/um/kernel/process.c | 2 +- + arch/um/kernel/ptrace.c | 8 ++++---- + arch/um/kernel/signal.c | 4 ++-- + include/linux/ptrace.h | 1 - + 6 files changed, 10 insertions(+), 9 deletions(-) + +--- a/arch/um/include/asm/thread_info.h ++++ b/arch/um/include/asm/thread_info.h +@@ -60,6 +60,7 @@ static inline struct thread_info *curren + #define TIF_RESTORE_SIGMASK 7 + #define TIF_NOTIFY_RESUME 8 + #define TIF_SECCOMP 9 /* secure computing */ ++#define TIF_SINGLESTEP 10 /* single stepping userspace */ + + #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +@@ -68,5 +69,6 @@ static inline struct thread_info *curren + #define _TIF_MEMDIE (1 << TIF_MEMDIE) + #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) + #define _TIF_SECCOMP (1 << TIF_SECCOMP) ++#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) + + #endif +--- a/arch/um/kernel/exec.c ++++ b/arch/um/kernel/exec.c +@@ -43,7 +43,7 @@ void start_thread(struct pt_regs *regs, + { + PT_REGS_IP(regs) = eip; + PT_REGS_SP(regs) = esp; +- current->ptrace &= ~PT_DTRACE; ++ clear_thread_flag(TIF_SINGLESTEP); + #ifdef SUBARCH_EXECVE1 + SUBARCH_EXECVE1(regs->regs); + #endif +--- a/arch/um/kernel/process.c ++++ b/arch/um/kernel/process.c +@@ -335,7 +335,7 @@ int singlestepping(void * t) + { + struct task_struct *task = t ? t : current; + +- if (!(task->ptrace & PT_DTRACE)) ++ if (!test_thread_flag(TIF_SINGLESTEP)) + return 0; + + if (task->thread.singlestep_syscall) +--- a/arch/um/kernel/ptrace.c ++++ b/arch/um/kernel/ptrace.c +@@ -12,7 +12,7 @@ + + void user_enable_single_step(struct task_struct *child) + { +- child->ptrace |= PT_DTRACE; ++ set_tsk_thread_flag(child, TIF_SINGLESTEP); + child->thread.singlestep_syscall = 0; + + #ifdef SUBARCH_SET_SINGLESTEPPING +@@ -22,7 +22,7 @@ void user_enable_single_step(struct task + + void user_disable_single_step(struct task_struct *child) + { +- child->ptrace &= ~PT_DTRACE; ++ clear_tsk_thread_flag(child, TIF_SINGLESTEP); + child->thread.singlestep_syscall = 0; + + #ifdef SUBARCH_SET_SINGLESTEPPING +@@ -121,7 +121,7 @@ static void send_sigtrap(struct uml_pt_r + } + + /* +- * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and ++ * XXX Check TIF_SINGLESTEP for singlestepping check and + * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check + */ + int syscall_trace_enter(struct pt_regs *regs) +@@ -145,7 +145,7 @@ void syscall_trace_leave(struct pt_regs + audit_syscall_exit(regs); + + /* Fake a debug trap */ +- if (ptraced & PT_DTRACE) ++ if (test_thread_flag(TIF_SINGLESTEP)) + send_sigtrap(®s->regs, 0); + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) +--- a/arch/um/kernel/signal.c ++++ b/arch/um/kernel/signal.c +@@ -53,7 +53,7 @@ static void handle_signal(struct ksignal + unsigned long sp; + int err; + +- if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) ++ if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) + singlestep = 1; + + /* Did we come from a system call? */ +@@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs) + * on the host. The tracing thread will check this flag and + * PTRACE_SYSCALL if necessary. + */ +- if (current->ptrace & PT_DTRACE) ++ if (test_thread_flag(TIF_SINGLESTEP)) + current->thread.singlestep_syscall = + is_syscall(PT_REGS_IP(¤t->thread.regs)); + +--- a/include/linux/ptrace.h ++++ b/include/linux/ptrace.h +@@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_ + + #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ + #define PT_PTRACED 0x00000001 +-#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ + + #define PT_OPT_FLAG_SHIFT 3 + /* PT_TRACE_* event enable flags */ diff --git a/queue-5.17/ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch b/queue-5.17/ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch new file mode 100644 index 00000000000..412ca74a1f1 --- /dev/null +++ b/queue-5.17/ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch @@ -0,0 +1,83 @@ +From 4a3d2717d140401df7501a95e454180831a0c5af Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Tue, 26 Apr 2022 16:45:37 -0500 +Subject: ptrace/xtensa: Replace PT_SINGLESTEP with TIF_SINGLESTEP + +From: Eric W. Biederman + +commit 4a3d2717d140401df7501a95e454180831a0c5af upstream. + +xtensa is the last user of the PT_SINGLESTEP flag. Changing tsk->ptrace in +user_enable_single_step and user_disable_single_step without locking could +potentiallly cause problems. + +So use a thread info flag instead of a flag in tsk->ptrace. Use TIF_SINGLESTEP +that xtensa already had defined but unused. + +Remove the definitions of PT_SINGLESTEP and PT_BLOCKSTEP as they have no more users. + +Cc: stable@vger.kernel.org +Acked-by: Max Filippov +Tested-by: Kees Cook +Reviewed-by: Oleg Nesterov +Link: https://lkml.kernel.org/r/20220505182645.497868-4-ebiederm@xmission.com +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman +--- + arch/xtensa/kernel/ptrace.c | 4 ++-- + arch/xtensa/kernel/signal.c | 4 ++-- + include/linux/ptrace.h | 6 ------ + 3 files changed, 4 insertions(+), 10 deletions(-) + +--- a/arch/xtensa/kernel/ptrace.c ++++ b/arch/xtensa/kernel/ptrace.c +@@ -226,12 +226,12 @@ const struct user_regset_view *task_user + + void user_enable_single_step(struct task_struct *child) + { +- child->ptrace |= PT_SINGLESTEP; ++ set_tsk_thread_flag(child, TIF_SINGLESTEP); + } + + void user_disable_single_step(struct task_struct *child) + { +- child->ptrace &= ~PT_SINGLESTEP; ++ clear_tsk_thread_flag(child, TIF_SINGLESTEP); + } + + /* +--- a/arch/xtensa/kernel/signal.c ++++ b/arch/xtensa/kernel/signal.c +@@ -473,7 +473,7 @@ static void do_signal(struct pt_regs *re + /* Set up the stack frame */ + ret = setup_frame(&ksig, sigmask_to_save(), regs); + signal_setup_done(ret, &ksig, 0); +- if (current->ptrace & PT_SINGLESTEP) ++ if (test_thread_flag(TIF_SINGLESTEP)) + task_pt_regs(current)->icountlevel = 1; + + return; +@@ -499,7 +499,7 @@ static void do_signal(struct pt_regs *re + /* If there's no signal to deliver, we just restore the saved mask. */ + restore_saved_sigmask(); + +- if (current->ptrace & PT_SINGLESTEP) ++ if (test_thread_flag(TIF_SINGLESTEP)) + task_pt_regs(current)->icountlevel = 1; + return; + } +--- a/include/linux/ptrace.h ++++ b/include/linux/ptrace.h +@@ -46,12 +46,6 @@ extern int ptrace_access_vm(struct task_ + #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) + #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) + +-/* single stepping state bits (used on ARM and PA-RISC) */ +-#define PT_SINGLESTEP_BIT 31 +-#define PT_SINGLESTEP (1< +Date: Wed, 23 Feb 2022 19:32:24 +0800 +Subject: x86/kexec: fix memory leak of elf header buffer + +From: Baoquan He + +commit b3e34a47f98974d0844444c5121aaff123004e57 upstream. + +This is reported by kmemleak detector: + +unreferenced object 0xffffc900002a9000 (size 4096): + comm "kexec", pid 14950, jiffies 4295110793 (age 373.951s) + hex dump (first 32 bytes): + 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 .ELF............ + 04 00 3e 00 01 00 00 00 00 00 00 00 00 00 00 00 ..>............. + backtrace: + [<0000000016a8ef9f>] __vmalloc_node_range+0x101/0x170 + [<000000002b66b6c0>] __vmalloc_node+0xb4/0x160 + [<00000000ad40107d>] crash_prepare_elf64_headers+0x8e/0xcd0 + [<0000000019afff23>] crash_load_segments+0x260/0x470 + [<0000000019ebe95c>] bzImage64_load+0x814/0xad0 + [<0000000093e16b05>] arch_kexec_kernel_image_load+0x1be/0x2a0 + [<000000009ef2fc88>] kimage_file_alloc_init+0x2ec/0x5a0 + [<0000000038f5a97a>] __do_sys_kexec_file_load+0x28d/0x530 + [<0000000087c19992>] do_syscall_64+0x3b/0x90 + [<0000000066e063a4>] entry_SYSCALL_64_after_hwframe+0x44/0xae + +In crash_prepare_elf64_headers(), a buffer is allocated via vmalloc() to +store elf headers. While it's not freed back to system correctly when +kdump kernel is reloaded or unloaded. Then memory leak is caused. Fix it +by introducing x86 specific function arch_kimage_file_post_load_cleanup(), +and freeing the buffer there. + +And also remove the incorrect elf header buffer freeing code. Before +calling arch specific kexec_file loading function, the image instance has +been initialized. So 'image->elf_headers' must be NULL. It doesn't make +sense to free the elf header buffer in the place. + +Three different people have reported three bugs about the memory leak on +x86_64 inside Redhat. + +Link: https://lkml.kernel.org/r/20220223113225.63106-2-bhe@redhat.com +Signed-off-by: Baoquan He +Acked-by: Dave Young +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/machine_kexec_64.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/arch/x86/kernel/machine_kexec_64.c ++++ b/arch/x86/kernel/machine_kexec_64.c +@@ -374,9 +374,6 @@ void machine_kexec(struct kimage *image) + #ifdef CONFIG_KEXEC_FILE + void *arch_kexec_kernel_image_load(struct kimage *image) + { +- vfree(image->elf_headers); +- image->elf_headers = NULL; +- + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + +@@ -512,6 +509,15 @@ overflow: + (int)ELF64_R_TYPE(rel[i].r_info), value); + return -ENOEXEC; + } ++ ++int arch_kimage_file_post_load_cleanup(struct kimage *image) ++{ ++ vfree(image->elf_headers); ++ image->elf_headers = NULL; ++ image->elf_headers_sz = 0; ++ ++ return kexec_image_post_load_cleanup_default(image); ++} + #endif /* CONFIG_KEXEC_FILE */ + + static int diff --git a/queue-5.17/x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch b/queue-5.17/x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch new file mode 100644 index 00000000000..9fe20538e26 --- /dev/null +++ b/queue-5.17/x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch @@ -0,0 +1,100 @@ +From e5f28623ceb103e13fc3d7bd45edf9818b227fd0 Mon Sep 17 00:00:00 2001 +From: Ammar Faizi +Date: Tue, 29 Mar 2022 17:47:05 +0700 +Subject: x86/MCE/AMD: Fix memory leak when threshold_create_bank() fails + +From: Ammar Faizi + +commit e5f28623ceb103e13fc3d7bd45edf9818b227fd0 upstream. + +In mce_threshold_create_device(), if threshold_create_bank() fails, the +previously allocated threshold banks array @bp will be leaked because +the call to mce_threshold_remove_device() will not free it. + +This happens because mce_threshold_remove_device() fetches the pointer +through the threshold_banks per-CPU variable but bp is written there +only after the bank creation is successful, and not before, when +threshold_create_bank() fails. + +Add a helper which unwinds all the bank creation work previously done +and pass into it the previously allocated threshold banks array for +freeing. + + [ bp: Massage. ] + +Fixes: 6458de97fc15 ("x86/mce/amd: Straighten CPU hotplug path") +Co-developed-by: Alviro Iskandar Setiawan +Signed-off-by: Alviro Iskandar Setiawan +Co-developed-by: Yazen Ghannam +Signed-off-by: Yazen Ghannam +Signed-off-by: Ammar Faizi +Signed-off-by: Borislav Petkov +Cc: +Link: https://lore.kernel.org/r/20220329104705.65256-3-ammarfaizi2@gnuweeb.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/mce/amd.c | 32 +++++++++++++++++++------------- + 1 file changed, 19 insertions(+), 13 deletions(-) + +--- a/arch/x86/kernel/cpu/mce/amd.c ++++ b/arch/x86/kernel/cpu/mce/amd.c +@@ -1293,10 +1293,23 @@ out_free: + kfree(bank); + } + ++static void __threshold_remove_device(struct threshold_bank **bp) ++{ ++ unsigned int bank, numbanks = this_cpu_read(mce_num_banks); ++ ++ for (bank = 0; bank < numbanks; bank++) { ++ if (!bp[bank]) ++ continue; ++ ++ threshold_remove_bank(bp[bank]); ++ bp[bank] = NULL; ++ } ++ kfree(bp); ++} ++ + int mce_threshold_remove_device(unsigned int cpu) + { + struct threshold_bank **bp = this_cpu_read(threshold_banks); +- unsigned int bank, numbanks = this_cpu_read(mce_num_banks); + + if (!bp) + return 0; +@@ -1307,13 +1320,7 @@ int mce_threshold_remove_device(unsigned + */ + this_cpu_write(threshold_banks, NULL); + +- for (bank = 0; bank < numbanks; bank++) { +- if (bp[bank]) { +- threshold_remove_bank(bp[bank]); +- bp[bank] = NULL; +- } +- } +- kfree(bp); ++ __threshold_remove_device(bp); + return 0; + } + +@@ -1350,15 +1357,14 @@ int mce_threshold_create_device(unsigned + if (!(this_cpu_read(bank_map) & (1 << bank))) + continue; + err = threshold_create_bank(bp, cpu, bank); +- if (err) +- goto out_err; ++ if (err) { ++ __threshold_remove_device(bp); ++ return err; ++ } + } + this_cpu_write(threshold_banks, bp); + + if (thresholding_irq_en) + mce_threshold_vector = amd_threshold_interrupt; + return 0; +-out_err: +- mce_threshold_remove_device(cpu); +- return err; + } diff --git a/queue-5.17/x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch b/queue-5.17/x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch new file mode 100644 index 00000000000..ced210a5bdd --- /dev/null +++ b/queue-5.17/x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch @@ -0,0 +1,237 @@ +From 0c9782e204d3cc5625b9e8bf4e8625d38dfe0139 Mon Sep 17 00:00:00 2001 +From: Kristen Carlson Accardi +Date: Fri, 20 May 2022 10:42:47 -0700 +Subject: x86/sgx: Set active memcg prior to shmem allocation + +From: Kristen Carlson Accardi + +commit 0c9782e204d3cc5625b9e8bf4e8625d38dfe0139 upstream. + +When the system runs out of enclave memory, SGX can reclaim EPC pages +by swapping to normal RAM. These backing pages are allocated via a +per-enclave shared memory area. Since SGX allows unlimited over +commit on EPC memory, the reclaimer thread can allocate a large +number of backing RAM pages in response to EPC memory pressure. + +When the shared memory backing RAM allocation occurs during +the reclaimer thread context, the shared memory is charged to +the root memory control group, and the shmem usage of the enclave +is not properly accounted for, making cgroups ineffective at +limiting the amount of RAM an enclave can consume. + +For example, when using a cgroup to launch a set of test +enclaves, the kernel does not properly account for 50% - 75% of +shmem page allocations on average. In the worst case, when +nearly all allocations occur during the reclaimer thread, the +kernel accounts less than a percent of the amount of shmem used +by the enclave's cgroup to the correct cgroup. + +SGX stores a list of mm_structs that are associated with +an enclave. Pick one of them during reclaim and charge that +mm's memcg with the shmem allocation. The one that gets picked +is arbitrary, but this list almost always only has one mm. The +cases where there is more than one mm with different memcg's +are not worth considering. + +Create a new function - sgx_encl_alloc_backing(). This function +is used whenever a new backing storage page needs to be +allocated. Previously the same function was used for page +allocation as well as retrieving a previously allocated page. +Prior to backing page allocation, if there is a mm_struct associated +with the enclave that is requesting the allocation, it is set +as the active memory control group. + +[ dhansen: - fix merge conflict with ELDU fixes + - check against actual ksgxd_tsk, not ->mm ] + +Cc: stable@vger.kernel.org +Signed-off-by: Kristen Carlson Accardi +Signed-off-by: Dave Hansen +Reviewed-by: Shakeel Butt +Acked-by: Roman Gushchin +Link: https://lkml.kernel.org/r/20220520174248.4918-1-kristen@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/sgx/encl.c | 105 ++++++++++++++++++++++++++++++++++++++++- + arch/x86/kernel/cpu/sgx/encl.h | 7 +- + arch/x86/kernel/cpu/sgx/main.c | 9 ++- + 3 files changed, 115 insertions(+), 6 deletions(-) + +--- a/arch/x86/kernel/cpu/sgx/encl.c ++++ b/arch/x86/kernel/cpu/sgx/encl.c +@@ -152,7 +152,7 @@ static int __sgx_encl_eldu(struct sgx_en + + page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); + +- ret = sgx_encl_get_backing(encl, page_index, &b); ++ ret = sgx_encl_lookup_backing(encl, page_index, &b); + if (ret) + return ret; + +@@ -718,7 +718,7 @@ static struct page *sgx_encl_get_backing + * 0 on success, + * -errno otherwise. + */ +-int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, ++static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing) + { + pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); +@@ -743,6 +743,107 @@ int sgx_encl_get_backing(struct sgx_encl + return 0; + } + ++/* ++ * When called from ksgxd, returns the mem_cgroup of a struct mm stored ++ * in the enclave's mm_list. When not called from ksgxd, just returns ++ * the mem_cgroup of the current task. ++ */ ++static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl) ++{ ++ struct mem_cgroup *memcg = NULL; ++ struct sgx_encl_mm *encl_mm; ++ int idx; ++ ++ /* ++ * If called from normal task context, return the mem_cgroup ++ * of the current task's mm. The remainder of the handling is for ++ * ksgxd. ++ */ ++ if (!current_is_ksgxd()) ++ return get_mem_cgroup_from_mm(current->mm); ++ ++ /* ++ * Search the enclave's mm_list to find an mm associated with ++ * this enclave to charge the allocation to. ++ */ ++ idx = srcu_read_lock(&encl->srcu); ++ ++ list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { ++ if (!mmget_not_zero(encl_mm->mm)) ++ continue; ++ ++ memcg = get_mem_cgroup_from_mm(encl_mm->mm); ++ ++ mmput_async(encl_mm->mm); ++ ++ break; ++ } ++ ++ srcu_read_unlock(&encl->srcu, idx); ++ ++ /* ++ * In the rare case that there isn't an mm associated with ++ * the enclave, set memcg to the current active mem_cgroup. ++ * This will be the root mem_cgroup if there is no active ++ * mem_cgroup. ++ */ ++ if (!memcg) ++ return get_mem_cgroup_from_mm(NULL); ++ ++ return memcg; ++} ++ ++/** ++ * sgx_encl_alloc_backing() - allocate a new backing storage page ++ * @encl: an enclave pointer ++ * @page_index: enclave page index ++ * @backing: data for accessing backing storage for the page ++ * ++ * When called from ksgxd, sets the active memcg from one of the ++ * mms in the enclave's mm_list prior to any backing page allocation, ++ * in order to ensure that shmem page allocations are charged to the ++ * enclave. ++ * ++ * Return: ++ * 0 on success, ++ * -errno otherwise. ++ */ ++int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, ++ struct sgx_backing *backing) ++{ ++ struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl); ++ struct mem_cgroup *memcg = set_active_memcg(encl_memcg); ++ int ret; ++ ++ ret = sgx_encl_get_backing(encl, page_index, backing); ++ ++ set_active_memcg(memcg); ++ mem_cgroup_put(encl_memcg); ++ ++ return ret; ++} ++ ++/** ++ * sgx_encl_lookup_backing() - retrieve an existing backing storage page ++ * @encl: an enclave pointer ++ * @page_index: enclave page index ++ * @backing: data for accessing backing storage for the page ++ * ++ * Retrieve a backing page for loading data back into an EPC page with ELDU. ++ * It is the caller's responsibility to ensure that it is appropriate to use ++ * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is ++ * not used correctly, this will cause an allocation which is not accounted for. ++ * ++ * Return: ++ * 0 on success, ++ * -errno otherwise. ++ */ ++int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, ++ struct sgx_backing *backing) ++{ ++ return sgx_encl_get_backing(encl, page_index, backing); ++} ++ + /** + * sgx_encl_put_backing() - Unpin the backing storage + * @backing: data for accessing backing storage for the page +--- a/arch/x86/kernel/cpu/sgx/encl.h ++++ b/arch/x86/kernel/cpu/sgx/encl.h +@@ -103,10 +103,13 @@ static inline int sgx_encl_find(struct m + int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start, + unsigned long end, unsigned long vm_flags); + ++bool current_is_ksgxd(void); + void sgx_encl_release(struct kref *ref); + int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm); +-int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, +- struct sgx_backing *backing); ++int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, ++ struct sgx_backing *backing); ++int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, ++ struct sgx_backing *backing); + void sgx_encl_put_backing(struct sgx_backing *backing); + int sgx_encl_test_and_clear_young(struct mm_struct *mm, + struct sgx_encl_page *page); +--- a/arch/x86/kernel/cpu/sgx/main.c ++++ b/arch/x86/kernel/cpu/sgx/main.c +@@ -313,7 +313,7 @@ static void sgx_reclaimer_write(struct s + sgx_encl_put_backing(backing); + + if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) { +- ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size), ++ ret = sgx_encl_alloc_backing(encl, PFN_DOWN(encl->size), + &secs_backing); + if (ret) + goto out; +@@ -384,7 +384,7 @@ static void sgx_reclaim_pages(void) + page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); + + mutex_lock(&encl_page->encl->lock); +- ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); ++ ret = sgx_encl_alloc_backing(encl_page->encl, page_index, &backing[i]); + if (ret) { + mutex_unlock(&encl_page->encl->lock); + goto skip; +@@ -475,6 +475,11 @@ static bool __init sgx_page_reclaimer_in + return true; + } + ++bool current_is_ksgxd(void) ++{ ++ return current == ksgxd_tsk; ++} ++ + static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) + { + struct sgx_numa_node *node = &sgx_numa_nodes[nid];