--- /dev/null
+From 86dca369075b3e310c3c0adb0f81e513c562b5e4 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Wed, 25 May 2022 06:39:52 -0700
+Subject: perf/x86/intel: Fix event constraints for ICL
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 86dca369075b3e310c3c0adb0f81e513c562b5e4 upstream.
+
+According to the latest event list, the event encoding 0x55
+INST_DECODED.DECODERS and 0x56 UOPS_DECODED.DEC0 are only available on
+the first 4 counters. Add them into the event constraints table.
+
+Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support")
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220525133952.1660658-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -255,7 +255,7 @@ static struct event_constraint intel_icl
+ INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
+ INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
+- INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
++ INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */
+ INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */
--- /dev/null
+From 1620c80bba53af8c547bab34a1d3bc58319fe608 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= <linux@mniewoehner.de>
+Date: Tue, 17 May 2022 20:31:30 +0200
+Subject: platform/x86: intel-hid: fix _DSM function index handling
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Michael Niewöhner <linux@mniewoehner.de>
+
+commit 1620c80bba53af8c547bab34a1d3bc58319fe608 upstream.
+
+intel_hid_dsm_fn_mask is a bit mask containing one bit for each function
+index. Fix the function index check in intel_hid_evaluate_method
+accordingly, which was missed in commit 97ab4516205e ("platform/x86:
+intel-hid: fix _DSM function index handling").
+
+Fixes: 97ab4516205e ("platform/x86: intel-hid: fix _DSM function index handling")
+Cc: stable@vger.kernel.org
+Signed-off-by: Michael Niewöhner <linux@mniewoehner.de>
+Link: https://lore.kernel.org/r/66f813f5bcc724a0f6dd5adefe6a9728dbe509e3.camel@mniewoehner.de
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/x86/intel/hid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/platform/x86/intel/hid.c
++++ b/drivers/platform/x86/intel/hid.c
+@@ -245,7 +245,7 @@ static bool intel_hid_evaluate_method(ac
+
+ method_name = (char *)intel_hid_dsm_fn_to_method[fn_index];
+
+- if (!(intel_hid_dsm_fn_mask & fn_index))
++ if (!(intel_hid_dsm_fn_mask & BIT(fn_index)))
+ goto skip_dsm_eval;
+
+ obj = acpi_evaluate_dsm_typed(handle, &intel_dsm_guid,
--- /dev/null
+From 6a2d90ba027adba528509ffa27097cffd3879257 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 29 Apr 2022 09:23:55 -0500
+Subject: ptrace: Reimplement PTRACE_KILL by always sending SIGKILL
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 6a2d90ba027adba528509ffa27097cffd3879257 upstream.
+
+The current implementation of PTRACE_KILL is buggy and has been for
+many years as it assumes it's target has stopped in ptrace_stop. At a
+quick skim it looks like this assumption has existed since ptrace
+support was added in linux v1.0.
+
+While PTRACE_KILL has been deprecated we can not remove it as
+a quick search with google code search reveals many existing
+programs calling it.
+
+When the ptracee is not stopped at ptrace_stop some fields would be
+set that are ignored except in ptrace_stop. Making the userspace
+visible behavior of PTRACE_KILL a noop in those case.
+
+As the usual rules are not obeyed it is not clear what the
+consequences are of calling PTRACE_KILL on a running process.
+Presumably userspace does not do this as it achieves nothing.
+
+Replace the implementation of PTRACE_KILL with a simple
+send_sig_info(SIGKILL) followed by a return 0. This changes the
+observable user space behavior only in that PTRACE_KILL on a process
+not stopped in ptrace_stop will also kill it. As that has always
+been the intent of the code this seems like a reasonable change.
+
+Cc: stable@vger.kernel.org
+Reported-by: Al Viro <viro@zeniv.linux.org.uk>
+Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
+Tested-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lkml.kernel.org/r/20220505182645.497868-7-ebiederm@xmission.com
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/step.c | 3 +--
+ kernel/ptrace.c | 5 ++---
+ 2 files changed, 3 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kernel/step.c
++++ b/arch/x86/kernel/step.c
+@@ -180,8 +180,7 @@ void set_task_blockstep(struct task_stru
+ *
+ * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
+ * task is current or it can't be running, otherwise we can race
+- * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
+- * PTRACE_KILL is not safe.
++ * with __switch_to_xtra(). We rely on ptrace_freeze_traced().
+ */
+ local_irq_disable();
+ debugctl = get_debugctlmsr();
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -1238,9 +1238,8 @@ int ptrace_request(struct task_struct *c
+ return ptrace_resume(child, request, data);
+
+ case PTRACE_KILL:
+- if (child->exit_state) /* already dead */
+- return 0;
+- return ptrace_resume(child, request, SIGKILL);
++ send_sig_info(SIGKILL, SEND_SIG_NOINFO, child);
++ return 0;
+
+ #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
+ case PTRACE_GETREGSET:
--- /dev/null
+From c200e4bb44e80b343c09841e7caaaca0aac5e5fa Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 26 Apr 2022 16:30:17 -0500
+Subject: ptrace/um: Replace PT_DTRACE with TIF_SINGLESTEP
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit c200e4bb44e80b343c09841e7caaaca0aac5e5fa upstream.
+
+User mode linux is the last user of the PT_DTRACE flag. Using the flag to indicate
+single stepping is a little confusing and worse changing tsk->ptrace without locking
+could potentionally cause problems.
+
+So use a thread info flag with a better name instead of flag in tsk->ptrace.
+
+Remove the definition PT_DTRACE as uml is the last user.
+
+Cc: stable@vger.kernel.org
+Acked-by: Johannes Berg <johannes@sipsolutions.net>
+Tested-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lkml.kernel.org/r/20220505182645.497868-3-ebiederm@xmission.com
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/um/include/asm/thread_info.h | 2 ++
+ arch/um/kernel/exec.c | 2 +-
+ arch/um/kernel/process.c | 2 +-
+ arch/um/kernel/ptrace.c | 8 ++++----
+ arch/um/kernel/signal.c | 4 ++--
+ include/linux/ptrace.h | 1 -
+ 6 files changed, 10 insertions(+), 9 deletions(-)
+
+--- a/arch/um/include/asm/thread_info.h
++++ b/arch/um/include/asm/thread_info.h
+@@ -64,6 +64,7 @@ static inline struct thread_info *curren
+ #define TIF_RESTORE_SIGMASK 7
+ #define TIF_NOTIFY_RESUME 8
+ #define TIF_SECCOMP 9 /* secure computing */
++#define TIF_SINGLESTEP 10 /* single stepping userspace */
+
+ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+@@ -72,5 +73,6 @@ static inline struct thread_info *curren
+ #define _TIF_MEMDIE (1 << TIF_MEMDIE)
+ #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP (1 << TIF_SECCOMP)
++#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
+
+ #endif
+--- a/arch/um/kernel/exec.c
++++ b/arch/um/kernel/exec.c
+@@ -42,7 +42,7 @@ void start_thread(struct pt_regs *regs,
+ {
+ PT_REGS_IP(regs) = eip;
+ PT_REGS_SP(regs) = esp;
+- current->ptrace &= ~PT_DTRACE;
++ clear_thread_flag(TIF_SINGLESTEP);
+ #ifdef SUBARCH_EXECVE1
+ SUBARCH_EXECVE1(regs->regs);
+ #endif
+--- a/arch/um/kernel/process.c
++++ b/arch/um/kernel/process.c
+@@ -339,7 +339,7 @@ int singlestepping(void * t)
+ {
+ struct task_struct *task = t ? t : current;
+
+- if (!(task->ptrace & PT_DTRACE))
++ if (!test_thread_flag(TIF_SINGLESTEP))
+ return 0;
+
+ if (task->thread.singlestep_syscall)
+--- a/arch/um/kernel/ptrace.c
++++ b/arch/um/kernel/ptrace.c
+@@ -12,7 +12,7 @@
+
+ void user_enable_single_step(struct task_struct *child)
+ {
+- child->ptrace |= PT_DTRACE;
++ set_tsk_thread_flag(child, TIF_SINGLESTEP);
+ child->thread.singlestep_syscall = 0;
+
+ #ifdef SUBARCH_SET_SINGLESTEPPING
+@@ -22,7 +22,7 @@ void user_enable_single_step(struct task
+
+ void user_disable_single_step(struct task_struct *child)
+ {
+- child->ptrace &= ~PT_DTRACE;
++ clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+ child->thread.singlestep_syscall = 0;
+
+ #ifdef SUBARCH_SET_SINGLESTEPPING
+@@ -121,7 +121,7 @@ static void send_sigtrap(struct uml_pt_r
+ }
+
+ /*
+- * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and
++ * XXX Check TIF_SINGLESTEP for singlestepping check and
+ * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check
+ */
+ int syscall_trace_enter(struct pt_regs *regs)
+@@ -145,7 +145,7 @@ void syscall_trace_leave(struct pt_regs
+ audit_syscall_exit(regs);
+
+ /* Fake a debug trap */
+- if (ptraced & PT_DTRACE)
++ if (test_thread_flag(TIF_SINGLESTEP))
+ send_sigtrap(®s->regs, 0);
+
+ if (!test_thread_flag(TIF_SYSCALL_TRACE))
+--- a/arch/um/kernel/signal.c
++++ b/arch/um/kernel/signal.c
+@@ -53,7 +53,7 @@ static void handle_signal(struct ksignal
+ unsigned long sp;
+ int err;
+
+- if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
++ if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED))
+ singlestep = 1;
+
+ /* Did we come from a system call? */
+@@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs)
+ * on the host. The tracing thread will check this flag and
+ * PTRACE_SYSCALL if necessary.
+ */
+- if (current->ptrace & PT_DTRACE)
++ if (test_thread_flag(TIF_SINGLESTEP))
+ current->thread.singlestep_syscall =
+ is_syscall(PT_REGS_IP(¤t->thread.regs));
+
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_
+
+ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */
+ #define PT_PTRACED 0x00000001
+-#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */
+
+ #define PT_OPT_FLAG_SHIFT 3
+ /* PT_TRACE_* event enable flags */
--- /dev/null
+From 4a3d2717d140401df7501a95e454180831a0c5af Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 26 Apr 2022 16:45:37 -0500
+Subject: ptrace/xtensa: Replace PT_SINGLESTEP with TIF_SINGLESTEP
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+commit 4a3d2717d140401df7501a95e454180831a0c5af upstream.
+
+xtensa is the last user of the PT_SINGLESTEP flag. Changing tsk->ptrace in
+user_enable_single_step and user_disable_single_step without locking could
+potentiallly cause problems.
+
+So use a thread info flag instead of a flag in tsk->ptrace. Use TIF_SINGLESTEP
+that xtensa already had defined but unused.
+
+Remove the definitions of PT_SINGLESTEP and PT_BLOCKSTEP as they have no more users.
+
+Cc: stable@vger.kernel.org
+Acked-by: Max Filippov <jcmvbkbc@gmail.com>
+Tested-by: Kees Cook <keescook@chromium.org>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lkml.kernel.org/r/20220505182645.497868-4-ebiederm@xmission.com
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/xtensa/kernel/ptrace.c | 4 ++--
+ arch/xtensa/kernel/signal.c | 4 ++--
+ include/linux/ptrace.h | 6 ------
+ 3 files changed, 4 insertions(+), 10 deletions(-)
+
+--- a/arch/xtensa/kernel/ptrace.c
++++ b/arch/xtensa/kernel/ptrace.c
+@@ -226,12 +226,12 @@ const struct user_regset_view *task_user
+
+ void user_enable_single_step(struct task_struct *child)
+ {
+- child->ptrace |= PT_SINGLESTEP;
++ set_tsk_thread_flag(child, TIF_SINGLESTEP);
+ }
+
+ void user_disable_single_step(struct task_struct *child)
+ {
+- child->ptrace &= ~PT_SINGLESTEP;
++ clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+ }
+
+ /*
+--- a/arch/xtensa/kernel/signal.c
++++ b/arch/xtensa/kernel/signal.c
+@@ -465,7 +465,7 @@ static void do_signal(struct pt_regs *re
+ /* Set up the stack frame */
+ ret = setup_frame(&ksig, sigmask_to_save(), regs);
+ signal_setup_done(ret, &ksig, 0);
+- if (current->ptrace & PT_SINGLESTEP)
++ if (test_thread_flag(TIF_SINGLESTEP))
+ task_pt_regs(current)->icountlevel = 1;
+
+ return;
+@@ -491,7 +491,7 @@ static void do_signal(struct pt_regs *re
+ /* If there's no signal to deliver, we just restore the saved mask. */
+ restore_saved_sigmask();
+
+- if (current->ptrace & PT_SINGLESTEP)
++ if (test_thread_flag(TIF_SINGLESTEP))
+ task_pt_regs(current)->icountlevel = 1;
+ return;
+ }
+--- a/include/linux/ptrace.h
++++ b/include/linux/ptrace.h
+@@ -46,12 +46,6 @@ extern int ptrace_access_vm(struct task_
+ #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
+ #define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
+
+-/* single stepping state bits (used on ARM and PA-RISC) */
+-#define PT_SINGLESTEP_BIT 31
+-#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT)
+-#define PT_BLOCKSTEP_BIT 30
+-#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT)
+-
+ extern long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data);
+ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
cifs-fix-potential-double-free-during-failed-mount.patch
cifs-when-extending-a-file-with-falloc-we-should-make-files-not-sparse.patch
xhci-allow-host-runtime-pm-as-default-for-intel-alder-lake-n-xhci.patch
+platform-x86-intel-hid-fix-_dsm-function-index-handling.patch
+x86-mce-amd-fix-memory-leak-when-threshold_create_bank-fails.patch
+perf-x86-intel-fix-event-constraints-for-icl.patch
+x86-kexec-fix-memory-leak-of-elf-header-buffer.patch
+x86-sgx-set-active-memcg-prior-to-shmem-allocation.patch
+ptrace-um-replace-pt_dtrace-with-tif_singlestep.patch
+ptrace-xtensa-replace-pt_singlestep-with-tif_singlestep.patch
+ptrace-reimplement-ptrace_kill-by-always-sending-sigkill.patch
--- /dev/null
+From b3e34a47f98974d0844444c5121aaff123004e57 Mon Sep 17 00:00:00 2001
+From: Baoquan He <bhe@redhat.com>
+Date: Wed, 23 Feb 2022 19:32:24 +0800
+Subject: x86/kexec: fix memory leak of elf header buffer
+
+From: Baoquan He <bhe@redhat.com>
+
+commit b3e34a47f98974d0844444c5121aaff123004e57 upstream.
+
+This is reported by kmemleak detector:
+
+unreferenced object 0xffffc900002a9000 (size 4096):
+ comm "kexec", pid 14950, jiffies 4295110793 (age 373.951s)
+ hex dump (first 32 bytes):
+ 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 .ELF............
+ 04 00 3e 00 01 00 00 00 00 00 00 00 00 00 00 00 ..>.............
+ backtrace:
+ [<0000000016a8ef9f>] __vmalloc_node_range+0x101/0x170
+ [<000000002b66b6c0>] __vmalloc_node+0xb4/0x160
+ [<00000000ad40107d>] crash_prepare_elf64_headers+0x8e/0xcd0
+ [<0000000019afff23>] crash_load_segments+0x260/0x470
+ [<0000000019ebe95c>] bzImage64_load+0x814/0xad0
+ [<0000000093e16b05>] arch_kexec_kernel_image_load+0x1be/0x2a0
+ [<000000009ef2fc88>] kimage_file_alloc_init+0x2ec/0x5a0
+ [<0000000038f5a97a>] __do_sys_kexec_file_load+0x28d/0x530
+ [<0000000087c19992>] do_syscall_64+0x3b/0x90
+ [<0000000066e063a4>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+In crash_prepare_elf64_headers(), a buffer is allocated via vmalloc() to
+store elf headers. While it's not freed back to system correctly when
+kdump kernel is reloaded or unloaded. Then memory leak is caused. Fix it
+by introducing x86 specific function arch_kimage_file_post_load_cleanup(),
+and freeing the buffer there.
+
+And also remove the incorrect elf header buffer freeing code. Before
+calling arch specific kexec_file loading function, the image instance has
+been initialized. So 'image->elf_headers' must be NULL. It doesn't make
+sense to free the elf header buffer in the place.
+
+Three different people have reported three bugs about the memory leak on
+x86_64 inside Redhat.
+
+Link: https://lkml.kernel.org/r/20220223113225.63106-2-bhe@redhat.com
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Acked-by: Dave Young <dyoung@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/machine_kexec_64.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/machine_kexec_64.c
++++ b/arch/x86/kernel/machine_kexec_64.c
+@@ -373,9 +373,6 @@ void machine_kexec(struct kimage *image)
+ #ifdef CONFIG_KEXEC_FILE
+ void *arch_kexec_kernel_image_load(struct kimage *image)
+ {
+- vfree(image->elf_headers);
+- image->elf_headers = NULL;
+-
+ if (!image->fops || !image->fops->load)
+ return ERR_PTR(-ENOEXEC);
+
+@@ -511,6 +508,15 @@ overflow:
+ (int)ELF64_R_TYPE(rel[i].r_info), value);
+ return -ENOEXEC;
+ }
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image)
++{
++ vfree(image->elf_headers);
++ image->elf_headers = NULL;
++ image->elf_headers_sz = 0;
++
++ return kexec_image_post_load_cleanup_default(image);
++}
+ #endif /* CONFIG_KEXEC_FILE */
+
+ static int
--- /dev/null
+From e5f28623ceb103e13fc3d7bd45edf9818b227fd0 Mon Sep 17 00:00:00 2001
+From: Ammar Faizi <ammarfaizi2@gnuweeb.org>
+Date: Tue, 29 Mar 2022 17:47:05 +0700
+Subject: x86/MCE/AMD: Fix memory leak when threshold_create_bank() fails
+
+From: Ammar Faizi <ammarfaizi2@gnuweeb.org>
+
+commit e5f28623ceb103e13fc3d7bd45edf9818b227fd0 upstream.
+
+In mce_threshold_create_device(), if threshold_create_bank() fails, the
+previously allocated threshold banks array @bp will be leaked because
+the call to mce_threshold_remove_device() will not free it.
+
+This happens because mce_threshold_remove_device() fetches the pointer
+through the threshold_banks per-CPU variable but bp is written there
+only after the bank creation is successful, and not before, when
+threshold_create_bank() fails.
+
+Add a helper which unwinds all the bank creation work previously done
+and pass into it the previously allocated threshold banks array for
+freeing.
+
+ [ bp: Massage. ]
+
+Fixes: 6458de97fc15 ("x86/mce/amd: Straighten CPU hotplug path")
+Co-developed-by: Alviro Iskandar Setiawan <alviro.iskandar@gnuweeb.org>
+Signed-off-by: Alviro Iskandar Setiawan <alviro.iskandar@gnuweeb.org>
+Co-developed-by: Yazen Ghannam <yazen.ghannam@amd.com>
+Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
+Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220329104705.65256-3-ammarfaizi2@gnuweeb.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/mce/amd.c | 32 +++++++++++++++++++-------------
+ 1 file changed, 19 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/kernel/cpu/mce/amd.c
++++ b/arch/x86/kernel/cpu/mce/amd.c
+@@ -1470,10 +1470,23 @@ out_free:
+ kfree(bank);
+ }
+
++static void __threshold_remove_device(struct threshold_bank **bp)
++{
++ unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
++
++ for (bank = 0; bank < numbanks; bank++) {
++ if (!bp[bank])
++ continue;
++
++ threshold_remove_bank(bp[bank]);
++ bp[bank] = NULL;
++ }
++ kfree(bp);
++}
++
+ int mce_threshold_remove_device(unsigned int cpu)
+ {
+ struct threshold_bank **bp = this_cpu_read(threshold_banks);
+- unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
+
+ if (!bp)
+ return 0;
+@@ -1484,13 +1497,7 @@ int mce_threshold_remove_device(unsigned
+ */
+ this_cpu_write(threshold_banks, NULL);
+
+- for (bank = 0; bank < numbanks; bank++) {
+- if (bp[bank]) {
+- threshold_remove_bank(bp[bank]);
+- bp[bank] = NULL;
+- }
+- }
+- kfree(bp);
++ __threshold_remove_device(bp);
+ return 0;
+ }
+
+@@ -1527,15 +1534,14 @@ int mce_threshold_create_device(unsigned
+ if (!(this_cpu_read(bank_map) & (1 << bank)))
+ continue;
+ err = threshold_create_bank(bp, cpu, bank);
+- if (err)
+- goto out_err;
++ if (err) {
++ __threshold_remove_device(bp);
++ return err;
++ }
+ }
+ this_cpu_write(threshold_banks, bp);
+
+ if (thresholding_irq_en)
+ mce_threshold_vector = amd_threshold_interrupt;
+ return 0;
+-out_err:
+- mce_threshold_remove_device(cpu);
+- return err;
+ }
--- /dev/null
+From 0c9782e204d3cc5625b9e8bf4e8625d38dfe0139 Mon Sep 17 00:00:00 2001
+From: Kristen Carlson Accardi <kristen@linux.intel.com>
+Date: Fri, 20 May 2022 10:42:47 -0700
+Subject: x86/sgx: Set active memcg prior to shmem allocation
+
+From: Kristen Carlson Accardi <kristen@linux.intel.com>
+
+commit 0c9782e204d3cc5625b9e8bf4e8625d38dfe0139 upstream.
+
+When the system runs out of enclave memory, SGX can reclaim EPC pages
+by swapping to normal RAM. These backing pages are allocated via a
+per-enclave shared memory area. Since SGX allows unlimited over
+commit on EPC memory, the reclaimer thread can allocate a large
+number of backing RAM pages in response to EPC memory pressure.
+
+When the shared memory backing RAM allocation occurs during
+the reclaimer thread context, the shared memory is charged to
+the root memory control group, and the shmem usage of the enclave
+is not properly accounted for, making cgroups ineffective at
+limiting the amount of RAM an enclave can consume.
+
+For example, when using a cgroup to launch a set of test
+enclaves, the kernel does not properly account for 50% - 75% of
+shmem page allocations on average. In the worst case, when
+nearly all allocations occur during the reclaimer thread, the
+kernel accounts less than a percent of the amount of shmem used
+by the enclave's cgroup to the correct cgroup.
+
+SGX stores a list of mm_structs that are associated with
+an enclave. Pick one of them during reclaim and charge that
+mm's memcg with the shmem allocation. The one that gets picked
+is arbitrary, but this list almost always only has one mm. The
+cases where there is more than one mm with different memcg's
+are not worth considering.
+
+Create a new function - sgx_encl_alloc_backing(). This function
+is used whenever a new backing storage page needs to be
+allocated. Previously the same function was used for page
+allocation as well as retrieving a previously allocated page.
+Prior to backing page allocation, if there is a mm_struct associated
+with the enclave that is requesting the allocation, it is set
+as the active memory control group.
+
+[ dhansen: - fix merge conflict with ELDU fixes
+ - check against actual ksgxd_tsk, not ->mm ]
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
+Link: https://lkml.kernel.org/r/20220520174248.4918-1-kristen@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/sgx/encl.c | 105 ++++++++++++++++++++++++++++++++++++++++-
+ arch/x86/kernel/cpu/sgx/encl.h | 7 +-
+ arch/x86/kernel/cpu/sgx/main.c | 9 ++-
+ 3 files changed, 115 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kernel/cpu/sgx/encl.c
++++ b/arch/x86/kernel/cpu/sgx/encl.c
+@@ -152,7 +152,7 @@ static int __sgx_encl_eldu(struct sgx_en
+
+ page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
+
+- ret = sgx_encl_get_backing(encl, page_index, &b);
++ ret = sgx_encl_lookup_backing(encl, page_index, &b);
+ if (ret)
+ return ret;
+
+@@ -718,7 +718,7 @@ static struct page *sgx_encl_get_backing
+ * 0 on success,
+ * -errno otherwise.
+ */
+-int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
++static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
+ struct sgx_backing *backing)
+ {
+ pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index);
+@@ -743,6 +743,107 @@ int sgx_encl_get_backing(struct sgx_encl
+ return 0;
+ }
+
++/*
++ * When called from ksgxd, returns the mem_cgroup of a struct mm stored
++ * in the enclave's mm_list. When not called from ksgxd, just returns
++ * the mem_cgroup of the current task.
++ */
++static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl)
++{
++ struct mem_cgroup *memcg = NULL;
++ struct sgx_encl_mm *encl_mm;
++ int idx;
++
++ /*
++ * If called from normal task context, return the mem_cgroup
++ * of the current task's mm. The remainder of the handling is for
++ * ksgxd.
++ */
++ if (!current_is_ksgxd())
++ return get_mem_cgroup_from_mm(current->mm);
++
++ /*
++ * Search the enclave's mm_list to find an mm associated with
++ * this enclave to charge the allocation to.
++ */
++ idx = srcu_read_lock(&encl->srcu);
++
++ list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
++ if (!mmget_not_zero(encl_mm->mm))
++ continue;
++
++ memcg = get_mem_cgroup_from_mm(encl_mm->mm);
++
++ mmput_async(encl_mm->mm);
++
++ break;
++ }
++
++ srcu_read_unlock(&encl->srcu, idx);
++
++ /*
++ * In the rare case that there isn't an mm associated with
++ * the enclave, set memcg to the current active mem_cgroup.
++ * This will be the root mem_cgroup if there is no active
++ * mem_cgroup.
++ */
++ if (!memcg)
++ return get_mem_cgroup_from_mm(NULL);
++
++ return memcg;
++}
++
++/**
++ * sgx_encl_alloc_backing() - allocate a new backing storage page
++ * @encl: an enclave pointer
++ * @page_index: enclave page index
++ * @backing: data for accessing backing storage for the page
++ *
++ * When called from ksgxd, sets the active memcg from one of the
++ * mms in the enclave's mm_list prior to any backing page allocation,
++ * in order to ensure that shmem page allocations are charged to the
++ * enclave.
++ *
++ * Return:
++ * 0 on success,
++ * -errno otherwise.
++ */
++int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
++ struct sgx_backing *backing)
++{
++ struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl);
++ struct mem_cgroup *memcg = set_active_memcg(encl_memcg);
++ int ret;
++
++ ret = sgx_encl_get_backing(encl, page_index, backing);
++
++ set_active_memcg(memcg);
++ mem_cgroup_put(encl_memcg);
++
++ return ret;
++}
++
++/**
++ * sgx_encl_lookup_backing() - retrieve an existing backing storage page
++ * @encl: an enclave pointer
++ * @page_index: enclave page index
++ * @backing: data for accessing backing storage for the page
++ *
++ * Retrieve a backing page for loading data back into an EPC page with ELDU.
++ * It is the caller's responsibility to ensure that it is appropriate to use
++ * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is
++ * not used correctly, this will cause an allocation which is not accounted for.
++ *
++ * Return:
++ * 0 on success,
++ * -errno otherwise.
++ */
++int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
++ struct sgx_backing *backing)
++{
++ return sgx_encl_get_backing(encl, page_index, backing);
++}
++
+ /**
+ * sgx_encl_put_backing() - Unpin the backing storage
+ * @backing: data for accessing backing storage for the page
+--- a/arch/x86/kernel/cpu/sgx/encl.h
++++ b/arch/x86/kernel/cpu/sgx/encl.h
+@@ -103,10 +103,13 @@ static inline int sgx_encl_find(struct m
+ int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
+ unsigned long end, unsigned long vm_flags);
+
++bool current_is_ksgxd(void);
+ void sgx_encl_release(struct kref *ref);
+ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm);
+-int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
+- struct sgx_backing *backing);
++int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index,
++ struct sgx_backing *backing);
++int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index,
++ struct sgx_backing *backing);
+ void sgx_encl_put_backing(struct sgx_backing *backing);
+ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
+ struct sgx_encl_page *page);
+--- a/arch/x86/kernel/cpu/sgx/main.c
++++ b/arch/x86/kernel/cpu/sgx/main.c
+@@ -292,7 +292,7 @@ static void sgx_reclaimer_write(struct s
+ sgx_encl_put_backing(backing);
+
+ if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) {
+- ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
++ ret = sgx_encl_alloc_backing(encl, PFN_DOWN(encl->size),
+ &secs_backing);
+ if (ret)
+ goto out;
+@@ -365,7 +365,7 @@ static void sgx_reclaim_pages(void)
+ page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base);
+
+ mutex_lock(&encl_page->encl->lock);
+- ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]);
++ ret = sgx_encl_alloc_backing(encl_page->encl, page_index, &backing[i]);
+ if (ret) {
+ mutex_unlock(&encl_page->encl->lock);
+ goto skip;
+@@ -462,6 +462,11 @@ static bool __init sgx_page_reclaimer_in
+ return true;
+ }
+
++bool current_is_ksgxd(void)
++{
++ return current == ksgxd_tsk;
++}
++
+ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
+ {
+ struct sgx_numa_node *node = &sgx_numa_nodes[nid];