]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Jun 2022 09:43:17 +0000 (11:43 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Jun 2022 09:43:17 +0000 (11:43 +0200)
added patches:
perf-x86-intel-fix-event-constraints-for-icl.patch
platform-x86-intel-hid-fix-_dsm-function-index-handling.patch
ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch
ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch
ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch
x86-kexec-fix-memory-leak-of-elf-header-buffer.patch
x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch
x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch

queue-5.15/perf-x86-intel-fix-event-constraints-for-icl.patch [new file with mode: 0644]
queue-5.15/platform-x86-intel-hid-fix-_dsm-function-index-handling.patch [new file with mode: 0644]
queue-5.15/ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch [new file with mode: 0644]
queue-5.15/ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch [new file with mode: 0644]
queue-5.15/ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/x86-kexec-fix-memory-leak-of-elf-header-buffer.patch [new file with mode: 0644]
queue-5.15/x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch [new file with mode: 0644]
queue-5.15/x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch [new file with mode: 0644]

diff --git a/queue-5.15/perf-x86-intel-fix-event-constraints-for-icl.patch b/queue-5.15/perf-x86-intel-fix-event-constraints-for-icl.patch
new file mode 100644 (file)
index 0000000..d0bc472
--- /dev/null
@@ -0,0 +1,35 @@
+From 86dca369075b3e310c3c0adb0f81e513c562b5e4 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Wed, 25 May 2022 06:39:52 -0700
+Subject: perf/x86/intel: Fix event constraints for ICL
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 86dca369075b3e310c3c0adb0f81e513c562b5e4 upstream.
+
+According to the latest event list, the event encoding 0x55
+INST_DECODED.DECODERS and 0x56 UOPS_DECODED.DEC0 are only available on
+the first 4 counters. Add them into the event constraints table.
+
+Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support")
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220525133952.1660658-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -255,7 +255,7 @@ static struct event_constraint intel_icl
+       INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
+       INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
+       INTEL_EVENT_CONSTRAINT(0x32, 0xf),      /* SW_PREFETCH_ACCESS.* */
+-      INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
++      INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf),
+       INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_TOTAL */
+       INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff),  /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */
diff --git a/queue-5.15/platform-x86-intel-hid-fix-_dsm-function-index-handling.patch b/queue-5.15/platform-x86-intel-hid-fix-_dsm-function-index-handling.patch
new file mode 100644 (file)
index 0000000..3ae0dc5
--- /dev/null
@@ -0,0 +1,39 @@
+From 1620c80bba53af8c547bab34a1d3bc58319fe608 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= <linux@mniewoehner.de>
+Date: Tue, 17 May 2022 20:31:30 +0200
+Subject: platform/x86: intel-hid: fix _DSM function index handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Michael Niewöhner <linux@mniewoehner.de>
+
+commit 1620c80bba53af8c547bab34a1d3bc58319fe608 upstream.
+
+intel_hid_dsm_fn_mask is a bit mask containing one bit for each function
+index. Fix the function index check in intel_hid_evaluate_method
+accordingly, which was missed in commit 97ab4516205e ("platform/x86:
+intel-hid: fix _DSM function index handling").
+
+Fixes: 97ab4516205e ("platform/x86: intel-hid: fix _DSM function index handling")
+Cc: stable@vger.kernel.org
+Signed-off-by: Michael Niewöhner <linux@mniewoehner.de>
+Link: https://lore.kernel.org/r/66f813f5bcc724a0f6dd5adefe6a9728dbe509e3.camel@mniewoehner.de
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/hid.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/platform/x86/intel/hid.c
++++ b/drivers/platform/x86/intel/hid.c
+@@ -245,7 +245,7 @@ static bool intel_hid_evaluate_method(ac
+       method_name = (char *)intel_hid_dsm_fn_to_method[fn_index];
+-      if (!(intel_hid_dsm_fn_mask & fn_index))
++      if (!(intel_hid_dsm_fn_mask & BIT(fn_index)))
+               goto skip_dsm_eval;
+       obj = acpi_evaluate_dsm_typed(handle, &intel_dsm_guid,
diff --git a/queue-5.15/ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch b/queue-5.15/ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch
new file mode 100644 (file)
index 0000000..4318d9e
--- /dev/null
@@ -0,0 +1,71 @@
+From 6a2d90ba027adba528509ffa27097cffd3879257 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 29 Apr 2022 09:23:55 -0500
+Subject: ptrace: Reimplement PTRACE_KILL by always sending SIGKILL
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 6a2d90ba027adba528509ffa27097cffd3879257 upstream.
+
+The current implementation of PTRACE_KILL is buggy and has been for
+many years as it assumes it's target has stopped in ptrace_stop.  At a
+quick skim it looks like this assumption has existed since ptrace
+support was added in linux v1.0.
+
+While PTRACE_KILL has been deprecated we can not remove it as
+a quick search with google code search reveals many existing
+programs calling it.
+
+When the ptracee is not stopped at ptrace_stop some fields would be
+set that are ignored except in ptrace_stop.  Making the userspace
+visible behavior of PTRACE_KILL a noop in those case.
+
+As the usual rules are not obeyed it is not clear what the
+consequences are of calling PTRACE_KILL on a running process.
+Presumably userspace does not do this as it achieves nothing.
+
+Replace the implementation of PTRACE_KILL with a simple
+send_sig_info(SIGKILL) followed by a return 0.  This changes the
+observable user space behavior only in that PTRACE_KILL on a process
+not stopped in ptrace_stop will also kill it.  As that has always
+been the intent of the code this seems like a reasonable change.
+
+Cc: stable@vger.kernel.org
+Reported-by: Al Viro <viro@zeniv.linux.org.uk>
+Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
+Tested-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lkml.kernel.org/r/20220505182645.497868-7-ebiederm@xmission.com
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/step.c |    3 +--
+ kernel/ptrace.c        |    5 ++---
+ 2 files changed, 3 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/step.c
++++ b/arch/x86/kernel/step.c
+@@ -180,8 +180,7 @@ void set_task_blockstep(struct task_stru
+        *
+        * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
+        * task is current or it can't be running, otherwise we can race
+-       * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
+-       * PTRACE_KILL is not safe.
++       * with __switch_to_xtra(). We rely on ptrace_freeze_traced().
+        */
+       local_irq_disable();
+       debugctl = get_debugctlmsr();
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -1238,9 +1238,8 @@ int ptrace_request(struct task_struct *c
+               return ptrace_resume(child, request, data);
+       case PTRACE_KILL:
+-              if (child->exit_state)  /* already dead */
+-                      return 0;
+-              return ptrace_resume(child, request, SIGKILL);
++              send_sig_info(SIGKILL, SEND_SIG_NOINFO, child);
++              return 0;
+ #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+       case PTRACE_GETREGSET:
diff --git a/queue-5.15/ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch b/queue-5.15/ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch
new file mode 100644 (file)
index 0000000..4cb978e
--- /dev/null
@@ -0,0 +1,140 @@
+From c200e4bb44e80b343c09841e7caaaca0aac5e5fa Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 26 Apr 2022 16:30:17 -0500
+Subject: ptrace/um: Replace PT_DTRACE with TIF_SINGLESTEP
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit c200e4bb44e80b343c09841e7caaaca0aac5e5fa upstream.
+
+User mode linux is the last user of the PT_DTRACE flag.  Using the flag to indicate
+single stepping is a little confusing and worse changing tsk->ptrace without locking
+could potentionally cause problems.
+
+So use a thread info flag with a better name instead of flag in tsk->ptrace.
+
+Remove the definition PT_DTRACE as uml is the last user.
+
+Cc: stable@vger.kernel.org
+Acked-by: Johannes Berg <johannes@sipsolutions.net>
+Tested-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lkml.kernel.org/r/20220505182645.497868-3-ebiederm@xmission.com
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/um/include/asm/thread_info.h |    2 ++
+ arch/um/kernel/exec.c             |    2 +-
+ arch/um/kernel/process.c          |    2 +-
+ arch/um/kernel/ptrace.c           |    8 ++++----
+ arch/um/kernel/signal.c           |    4 ++--
+ include/linux/ptrace.h            |    1 -
+ 6 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/arch/um/include/asm/thread_info.h
++++ b/arch/um/include/asm/thread_info.h
+@@ -64,6 +64,7 @@ static inline struct thread_info *curren
+ #define TIF_RESTORE_SIGMASK   7
+ #define TIF_NOTIFY_RESUME     8
+ #define TIF_SECCOMP           9       /* secure computing */
++#define TIF_SINGLESTEP                10      /* single stepping userspace */
+ #define _TIF_SYSCALL_TRACE    (1 << TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING               (1 << TIF_SIGPENDING)
+@@ -72,5 +73,6 @@ static inline struct thread_info *curren
+ #define _TIF_MEMDIE           (1 << TIF_MEMDIE)
+ #define _TIF_SYSCALL_AUDIT    (1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP          (1 << TIF_SECCOMP)
++#define _TIF_SINGLESTEP               (1 << TIF_SINGLESTEP)
+ #endif
+--- a/arch/um/kernel/exec.c
++++ b/arch/um/kernel/exec.c
+@@ -42,7 +42,7 @@ void start_thread(struct pt_regs *regs,
+ {
+       PT_REGS_IP(regs) = eip;
+       PT_REGS_SP(regs) = esp;
+-      current->ptrace &= ~PT_DTRACE;
++      clear_thread_flag(TIF_SINGLESTEP);
+ #ifdef SUBARCH_EXECVE1
+       SUBARCH_EXECVE1(regs->regs);
+ #endif
+--- a/arch/um/kernel/process.c
++++ b/arch/um/kernel/process.c
+@@ -339,7 +339,7 @@ int singlestepping(void * t)
+ {
+       struct task_struct *task = t ? t : current;
+-      if (!(task->ptrace & PT_DTRACE))
++      if (!test_thread_flag(TIF_SINGLESTEP))
+               return 0;
+       if (task->thread.singlestep_syscall)
+--- a/arch/um/kernel/ptrace.c
++++ b/arch/um/kernel/ptrace.c
+@@ -12,7 +12,7 @@
+ void user_enable_single_step(struct task_struct *child)
+ {
+-      child->ptrace |= PT_DTRACE;
++      set_tsk_thread_flag(child, TIF_SINGLESTEP);
+       child->thread.singlestep_syscall = 0;
+ #ifdef SUBARCH_SET_SINGLESTEPPING
+@@ -22,7 +22,7 @@ void user_enable_single_step(struct task
+ void user_disable_single_step(struct task_struct *child)
+ {
+-      child->ptrace &= ~PT_DTRACE;
++      clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+       child->thread.singlestep_syscall = 0;
+ #ifdef SUBARCH_SET_SINGLESTEPPING
+@@ -121,7 +121,7 @@ static void send_sigtrap(struct uml_pt_r
+ }
+ /*
+- * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and
++ * XXX Check TIF_SINGLESTEP for singlestepping check and
+  * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check
+  */
+ int syscall_trace_enter(struct pt_regs *regs)
+@@ -145,7 +145,7 @@ void syscall_trace_leave(struct pt_regs
+       audit_syscall_exit(regs);
+       /* Fake a debug trap */
+-      if (ptraced & PT_DTRACE)
++      if (test_thread_flag(TIF_SINGLESTEP))
+               send_sigtrap(&regs->regs, 0);
+       if (!test_thread_flag(TIF_SYSCALL_TRACE))
+--- a/arch/um/kernel/signal.c
++++ b/arch/um/kernel/signal.c
+@@ -53,7 +53,7 @@ static void handle_signal(struct ksignal
+       unsigned long sp;
+       int err;
+-      if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
++      if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED))
+               singlestep = 1;
+       /* Did we come from a system call? */
+@@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs)
+        * on the host.  The tracing thread will check this flag and
+        * PTRACE_SYSCALL if necessary.
+        */
+-      if (current->ptrace & PT_DTRACE)
++      if (test_thread_flag(TIF_SINGLESTEP))
+               current->thread.singlestep_syscall =
+                       is_syscall(PT_REGS_IP(&current->thread.regs));
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_
+ #define PT_SEIZED     0x00010000      /* SEIZE used, enable new behavior */
+ #define PT_PTRACED    0x00000001
+-#define PT_DTRACE     0x00000002      /* delayed trace (used on m68k, i386) */
+ #define PT_OPT_FLAG_SHIFT     3
+ /* PT_TRACE_* event enable flags */
diff --git a/queue-5.15/ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch b/queue-5.15/ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch
new file mode 100644 (file)
index 0000000..d8b6de9
--- /dev/null
@@ -0,0 +1,83 @@
+From 4a3d2717d140401df7501a95e454180831a0c5af Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 26 Apr 2022 16:45:37 -0500
+Subject: ptrace/xtensa: Replace PT_SINGLESTEP with TIF_SINGLESTEP
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 4a3d2717d140401df7501a95e454180831a0c5af upstream.
+
+xtensa is the last user of the PT_SINGLESTEP flag.  Changing tsk->ptrace in
+user_enable_single_step and user_disable_single_step without locking could
+potentiallly cause problems.
+
+So use a thread info flag instead of a flag in tsk->ptrace.  Use TIF_SINGLESTEP
+that xtensa already had defined but unused.
+
+Remove the definitions of PT_SINGLESTEP and PT_BLOCKSTEP as they have no more users.
+
+Cc: stable@vger.kernel.org
+Acked-by: Max Filippov <jcmvbkbc@gmail.com>
+Tested-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lkml.kernel.org/r/20220505182645.497868-4-ebiederm@xmission.com
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/xtensa/kernel/ptrace.c |    4 ++--
+ arch/xtensa/kernel/signal.c |    4 ++--
+ include/linux/ptrace.h      |    6 ------
+ 3 files changed, 4 insertions(+), 10 deletions(-)
+
+--- a/arch/xtensa/kernel/ptrace.c
++++ b/arch/xtensa/kernel/ptrace.c
+@@ -226,12 +226,12 @@ const struct user_regset_view *task_user
+ void user_enable_single_step(struct task_struct *child)
+ {
+-      child->ptrace |= PT_SINGLESTEP;
++      set_tsk_thread_flag(child, TIF_SINGLESTEP);
+ }
+ void user_disable_single_step(struct task_struct *child)
+ {
+-      child->ptrace &= ~PT_SINGLESTEP;
++      clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+ }
+ /*
+--- a/arch/xtensa/kernel/signal.c
++++ b/arch/xtensa/kernel/signal.c
+@@ -465,7 +465,7 @@ static void do_signal(struct pt_regs *re
+               /* Set up the stack frame */
+               ret = setup_frame(&ksig, sigmask_to_save(), regs);
+               signal_setup_done(ret, &ksig, 0);
+-              if (current->ptrace & PT_SINGLESTEP)
++              if (test_thread_flag(TIF_SINGLESTEP))
+                       task_pt_regs(current)->icountlevel = 1;
+               return;
+@@ -491,7 +491,7 @@ static void do_signal(struct pt_regs *re
+       /* If there's no signal to deliver, we just restore the saved mask.  */
+       restore_saved_sigmask();
+-      if (current->ptrace & PT_SINGLESTEP)
++      if (test_thread_flag(TIF_SINGLESTEP))
+               task_pt_regs(current)->icountlevel = 1;
+       return;
+ }
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -46,12 +46,6 @@ extern int ptrace_access_vm(struct task_
+ #define PT_EXITKILL           (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
+ #define PT_SUSPEND_SECCOMP    (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
+-/* single stepping state bits (used on ARM and PA-RISC) */
+-#define PT_SINGLESTEP_BIT     31
+-#define PT_SINGLESTEP         (1<<PT_SINGLESTEP_BIT)
+-#define PT_BLOCKSTEP_BIT      30
+-#define PT_BLOCKSTEP          (1<<PT_BLOCKSTEP_BIT)
+-
+ extern long arch_ptrace(struct task_struct *child, long request,
+                       unsigned long addr, unsigned long data);
+ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
index 6db80270b2adfb14c87b098c3726e6f8713d1eb3..1f4513178657e187b33b3fe9578e3ad8ec010f4b 100644 (file)
@@ -28,3 +28,11 @@ fs-ntfs3-restore-ntfs_xattr_get_acl-and-ntfs_xattr_set_acl-functions.patch
 cifs-fix-potential-double-free-during-failed-mount.patch
 cifs-when-extending-a-file-with-falloc-we-should-make-files-not-sparse.patch
 xhci-allow-host-runtime-pm-as-default-for-intel-alder-lake-n-xhci.patch
+platform-x86-intel-hid-fix-_dsm-function-index-handling.patch
+x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch
+perf-x86-intel-fix-event-constraints-for-icl.patch
+x86-kexec-fix-memory-leak-of-elf-header-buffer.patch
+x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch
+ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch
+ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch
+ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch
diff --git a/queue-5.15/x86-kexec-fix-memory-leak-of-elf-header-buffer.patch b/queue-5.15/x86-kexec-fix-memory-leak-of-elf-header-buffer.patch
new file mode 100644 (file)
index 0000000..c5cee6d
--- /dev/null
@@ -0,0 +1,80 @@
+From b3e34a47f98974d0844444c5121aaff123004e57 Mon Sep 17 00:00:00 2001
+From: Baoquan He <bhe@redhat.com>
+Date: Wed, 23 Feb 2022 19:32:24 +0800
+Subject: x86/kexec: fix memory leak of elf header buffer
+
+From: Baoquan He <bhe@redhat.com>
+
+commit b3e34a47f98974d0844444c5121aaff123004e57 upstream.
+
+This is reported by kmemleak detector:
+
+unreferenced object 0xffffc900002a9000 (size 4096):
+  comm "kexec", pid 14950, jiffies 4295110793 (age 373.951s)
+  hex dump (first 32 bytes):
+    7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00  .ELF............
+    04 00 3e 00 01 00 00 00 00 00 00 00 00 00 00 00  ..>.............
+  backtrace:
+    [<0000000016a8ef9f>] __vmalloc_node_range+0x101/0x170
+    [<000000002b66b6c0>] __vmalloc_node+0xb4/0x160
+    [<00000000ad40107d>] crash_prepare_elf64_headers+0x8e/0xcd0
+    [<0000000019afff23>] crash_load_segments+0x260/0x470
+    [<0000000019ebe95c>] bzImage64_load+0x814/0xad0
+    [<0000000093e16b05>] arch_kexec_kernel_image_load+0x1be/0x2a0
+    [<000000009ef2fc88>] kimage_file_alloc_init+0x2ec/0x5a0
+    [<0000000038f5a97a>] __do_sys_kexec_file_load+0x28d/0x530
+    [<0000000087c19992>] do_syscall_64+0x3b/0x90
+    [<0000000066e063a4>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+In crash_prepare_elf64_headers(), a buffer is allocated via vmalloc() to
+store elf headers.  While it's not freed back to system correctly when
+kdump kernel is reloaded or unloaded.  Then memory leak is caused.  Fix it
+by introducing x86 specific function arch_kimage_file_post_load_cleanup(),
+and freeing the buffer there.
+
+And also remove the incorrect elf header buffer freeing code.  Before
+calling arch specific kexec_file loading function, the image instance has
+been initialized.  So 'image->elf_headers' must be NULL.  It doesn't make
+sense to free the elf header buffer in the place.
+
+Three different people have reported three bugs about the memory leak on
+x86_64 inside Redhat.
+
+Link: https://lkml.kernel.org/r/20220223113225.63106-2-bhe@redhat.com
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Acked-by: Dave Young <dyoung@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/machine_kexec_64.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/machine_kexec_64.c
++++ b/arch/x86/kernel/machine_kexec_64.c
+@@ -373,9 +373,6 @@ void machine_kexec(struct kimage *image)
+ #ifdef CONFIG_KEXEC_FILE
+ void *arch_kexec_kernel_image_load(struct kimage *image)
+ {
+-      vfree(image->elf_headers);
+-      image->elf_headers = NULL;
+-
+       if (!image->fops || !image->fops->load)
+               return ERR_PTR(-ENOEXEC);
+@@ -511,6 +508,15 @@ overflow:
+              (int)ELF64_R_TYPE(rel[i].r_info), value);
+       return -ENOEXEC;
+ }
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image)
++{
++      vfree(image->elf_headers);
++      image->elf_headers = NULL;
++      image->elf_headers_sz = 0;
++
++      return kexec_image_post_load_cleanup_default(image);
++}
+ #endif /* CONFIG_KEXEC_FILE */
+ static int
diff --git a/queue-5.15/x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch b/queue-5.15/x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch
new file mode 100644 (file)
index 0000000..5204f5c
--- /dev/null
@@ -0,0 +1,100 @@
+From e5f28623ceb103e13fc3d7bd45edf9818b227fd0 Mon Sep 17 00:00:00 2001
+From: Ammar Faizi <ammarfaizi2@gnuweeb.org>
+Date: Tue, 29 Mar 2022 17:47:05 +0700
+Subject: x86/MCE/AMD: Fix memory leak when threshold_create_bank() fails
+
+From: Ammar Faizi <ammarfaizi2@gnuweeb.org>
+
+commit e5f28623ceb103e13fc3d7bd45edf9818b227fd0 upstream.
+
+In mce_threshold_create_device(), if threshold_create_bank() fails, the
+previously allocated threshold banks array @bp will be leaked because
+the call to mce_threshold_remove_device() will not free it.
+
+This happens because mce_threshold_remove_device() fetches the pointer
+through the threshold_banks per-CPU variable but bp is written there
+only after the bank creation is successful, and not before, when
+threshold_create_bank() fails.
+
+Add a helper which unwinds all the bank creation work previously done
+and pass into it the previously allocated threshold banks array for
+freeing.
+
+  [ bp: Massage. ]
+
+Fixes: 6458de97fc15 ("x86/mce/amd: Straighten CPU hotplug path")
+Co-developed-by: Alviro Iskandar Setiawan <alviro.iskandar@gnuweeb.org>
+Signed-off-by: Alviro Iskandar Setiawan <alviro.iskandar@gnuweeb.org>
+Co-developed-by: Yazen Ghannam <yazen.ghannam@amd.com>
+Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
+Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220329104705.65256-3-ammarfaizi2@gnuweeb.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/mce/amd.c |   32 +++++++++++++++++++-------------
+ 1 file changed, 19 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mce/amd.c
++++ b/arch/x86/kernel/cpu/mce/amd.c
+@@ -1470,10 +1470,23 @@ out_free:
+       kfree(bank);
+ }
++static void __threshold_remove_device(struct threshold_bank **bp)
++{
++      unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
++
++      for (bank = 0; bank < numbanks; bank++) {
++              if (!bp[bank])
++                      continue;
++
++              threshold_remove_bank(bp[bank]);
++              bp[bank] = NULL;
++      }
++      kfree(bp);
++}
++
+ int mce_threshold_remove_device(unsigned int cpu)
+ {
+       struct threshold_bank **bp = this_cpu_read(threshold_banks);
+-      unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
+       if (!bp)
+               return 0;
+@@ -1484,13 +1497,7 @@ int mce_threshold_remove_device(unsigned
+        */
+       this_cpu_write(threshold_banks, NULL);
+-      for (bank = 0; bank < numbanks; bank++) {
+-              if (bp[bank]) {
+-                      threshold_remove_bank(bp[bank]);
+-                      bp[bank] = NULL;
+-              }
+-      }
+-      kfree(bp);
++      __threshold_remove_device(bp);
+       return 0;
+ }
+@@ -1527,15 +1534,14 @@ int mce_threshold_create_device(unsigned
+               if (!(this_cpu_read(bank_map) & (1 << bank)))
+                       continue;
+               err = threshold_create_bank(bp, cpu, bank);
+-              if (err)
+-                      goto out_err;
++              if (err) {
++                      __threshold_remove_device(bp);
++                      return err;
++              }
+       }
+       this_cpu_write(threshold_banks, bp);
+       if (thresholding_irq_en)
+               mce_threshold_vector = amd_threshold_interrupt;
+       return 0;
+-out_err:
+-      mce_threshold_remove_device(cpu);
+-      return err;
+ }
diff --git a/queue-5.15/x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch b/queue-5.15/x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch
new file mode 100644 (file)
index 0000000..1437496
--- /dev/null
@@ -0,0 +1,237 @@
+From 0c9782e204d3cc5625b9e8bf4e8625d38dfe0139 Mon Sep 17 00:00:00 2001
+From: Kristen Carlson Accardi <kristen@linux.intel.com>
+Date: Fri, 20 May 2022 10:42:47 -0700
+Subject: x86/sgx: Set active memcg prior to shmem allocation
+
+From: Kristen Carlson Accardi <kristen@linux.intel.com>
+
+commit 0c9782e204d3cc5625b9e8bf4e8625d38dfe0139 upstream.
+
+When the system runs out of enclave memory, SGX can reclaim EPC pages
+by swapping to normal RAM. These backing pages are allocated via a
+per-enclave shared memory area. Since SGX allows unlimited over
+commit on EPC memory, the reclaimer thread can allocate a large
+number of backing RAM pages in response to EPC memory pressure.
+
+When the shared memory backing RAM allocation occurs during
+the reclaimer thread context, the shared memory is charged to
+the root memory control group, and the shmem usage of the enclave
+is not properly accounted for, making cgroups ineffective at
+limiting the amount of RAM an enclave can consume.
+
+For example, when using a cgroup to launch a set of test
+enclaves, the kernel does not properly account for 50% - 75% of
+shmem page allocations on average. In the worst case, when
+nearly all allocations occur during the reclaimer thread, the
+kernel accounts less than a percent of the amount of shmem used
+by the enclave's cgroup to the correct cgroup.
+
+SGX stores a list of mm_structs that are associated with
+an enclave. Pick one of them during reclaim and charge that
+mm's memcg with the shmem allocation. The one that gets picked
+is arbitrary, but this list almost always only has one mm. The
+cases where there is more than one mm with different memcg's
+are not worth considering.
+
+Create a new function - sgx_encl_alloc_backing(). This function
+is used whenever a new backing storage page needs to be
+allocated. Previously the same function was used for page
+allocation as well as retrieving a previously allocated page.
+Prior to backing page allocation, if there is a mm_struct associated
+with the enclave that is requesting the allocation, it is set
+as the active memory control group.
+
+[ dhansen: - fix merge conflict with ELDU fixes
+           - check against actual ksgxd_tsk, not ->mm ]
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
+Link: https://lkml.kernel.org/r/20220520174248.4918-1-kristen@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/sgx/encl.c |  105 ++++++++++++++++++++++++++++++++++++++++-
+ arch/x86/kernel/cpu/sgx/encl.h |    7 +-
+ arch/x86/kernel/cpu/sgx/main.c |    9 ++-
+ 3 files changed, 115 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/cpu/sgx/encl.c
++++ b/arch/x86/kernel/cpu/sgx/encl.c
+@@ -152,7 +152,7 @@ static int __sgx_encl_eldu(struct sgx_en
+       page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
+-      ret = sgx_encl_get_backing(encl, page_index, &b);
++      ret = sgx_encl_lookup_backing(encl, page_index, &b);
+       if (ret)
+               return ret;
+@@ -718,7 +718,7 @@ static struct page *sgx_encl_get_backing
+  *   0 on success,
+  *   -errno otherwise.
+  */
+-int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
++static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
+                        struct sgx_backing *backing)
+ {
+       pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
+@@ -743,6 +743,107 @@ int sgx_encl_get_backing(struct sgx_encl
+       return 0;
+ }
++/*
++ * When called from ksgxd, returns the mem_cgroup of a struct mm stored
++ * in the enclave's mm_list. When not called from ksgxd, just returns
++ * the mem_cgroup of the current task.
++ */
++static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl)
++{
++      struct mem_cgroup *memcg = NULL;
++      struct sgx_encl_mm *encl_mm;
++      int idx;
++
++      /*
++       * If called from normal task context, return the mem_cgroup
++       * of the current task's mm. The remainder of the handling is for
++       * ksgxd.
++       */
++      if (!current_is_ksgxd())
++              return get_mem_cgroup_from_mm(current->mm);
++
++      /*
++       * Search the enclave's mm_list to find an mm associated with
++       * this enclave to charge the allocation to.
++       */
++      idx = srcu_read_lock(&encl->srcu);
++
++      list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
++              if (!mmget_not_zero(encl_mm->mm))
++                      continue;
++
++              memcg = get_mem_cgroup_from_mm(encl_mm->mm);
++
++              mmput_async(encl_mm->mm);
++
++              break;
++      }
++
++      srcu_read_unlock(&encl->srcu, idx);
++
++      /*
++       * In the rare case that there isn't an mm associated with
++       * the enclave, set memcg to the current active mem_cgroup.
++       * This will be the root mem_cgroup if there is no active
++       * mem_cgroup.
++       */
++      if (!memcg)
++              return get_mem_cgroup_from_mm(NULL);
++
++      return memcg;
++}
++
++/**
++ * sgx_encl_alloc_backing() - allocate a new backing storage page
++ * @encl:     an enclave pointer
++ * @page_index:       enclave page index
++ * @backing:  data for accessing backing storage for the page
++ *
++ * When called from ksgxd, sets the active memcg from one of the
++ * mms in the enclave's mm_list prior to any backing page allocation,
++ * in order to ensure that shmem page allocations are charged to the
++ * enclave.
++ *
++ * Return:
++ *   0 on success,
++ *   -errno otherwise.
++ */
++int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
++                         struct sgx_backing *backing)
++{
++      struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl);
++      struct mem_cgroup *memcg = set_active_memcg(encl_memcg);
++      int ret;
++
++      ret = sgx_encl_get_backing(encl, page_index, backing);
++
++      set_active_memcg(memcg);
++      mem_cgroup_put(encl_memcg);
++
++      return ret;
++}
++
++/**
++ * sgx_encl_lookup_backing() - retrieve an existing backing storage page
++ * @encl:     an enclave pointer
++ * @page_index:       enclave page index
++ * @backing:  data for accessing backing storage for the page
++ *
++ * Retrieve a backing page for loading data back into an EPC page with ELDU.
++ * It is the caller's responsibility to ensure that it is appropriate to use
++ * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is
++ * not used correctly, this will cause an allocation which is not accounted for.
++ *
++ * Return:
++ *   0 on success,
++ *   -errno otherwise.
++ */
++int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
++                         struct sgx_backing *backing)
++{
++      return sgx_encl_get_backing(encl, page_index, backing);
++}
++
+ /**
+  * sgx_encl_put_backing() - Unpin the backing storage
+  * @backing:  data for accessing backing storage for the page
+--- a/arch/x86/kernel/cpu/sgx/encl.h
++++ b/arch/x86/kernel/cpu/sgx/encl.h
+@@ -103,10 +103,13 @@ static inline int sgx_encl_find(struct m
+ int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
+                    unsigned long end, unsigned long vm_flags);
++bool current_is_ksgxd(void);
+ void sgx_encl_release(struct kref *ref);
+ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
+-int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
+-                       struct sgx_backing *backing);
++int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
++                          struct sgx_backing *backing);
++int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
++                         struct sgx_backing *backing);
+ void sgx_encl_put_backing(struct sgx_backing *backing);
+ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
+                                 struct sgx_encl_page *page);
+--- a/arch/x86/kernel/cpu/sgx/main.c
++++ b/arch/x86/kernel/cpu/sgx/main.c
+@@ -292,7 +292,7 @@ static void sgx_reclaimer_write(struct s
+       sgx_encl_put_backing(backing);
+       if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) {
+-              ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
++              ret = sgx_encl_alloc_backing(encl, PFN_DOWN(encl->size),
+                                          &secs_backing);
+               if (ret)
+                       goto out;
+@@ -365,7 +365,7 @@ static void sgx_reclaim_pages(void)
+               page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
+               mutex_lock(&encl_page->encl->lock);
+-              ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]);
++              ret = sgx_encl_alloc_backing(encl_page->encl, page_index, &backing[i]);
+               if (ret) {
+                       mutex_unlock(&encl_page->encl->lock);
+                       goto skip;
+@@ -462,6 +462,11 @@ static bool __init sgx_page_reclaimer_in
+       return true;
+ }
++bool current_is_ksgxd(void)
++{
++      return current == ksgxd_tsk;
++}
++
+ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
+ {
+       struct sgx_numa_node *node = &sgx_numa_nodes[nid];