--- /dev/null
+From a1140cb215fa13dcec06d12ba0c3ee105633b7c4 Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Tue, 23 Aug 2022 08:45:32 -0700
+Subject: seccomp: Move copy_seccomp() to no failure path.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit a1140cb215fa13dcec06d12ba0c3ee105633b7c4 upstream.
+
+Our syzbot instance reported memory leaks in do_seccomp() [0], similar
+to the report [1]. It shows that we miss freeing struct seccomp_filter
+and some objects included in it.
+
+We can reproduce the issue with the program below [2] which calls one
+seccomp() and two clone() syscalls.
+
+The first clone()d child exits earlier than its parent and sends a
+signal to kill it during the second clone(), more precisely before the
+fatal_signal_pending() test in copy_process(). When the parent receives
+the signal, it has to destroy the embryonic process and return -EINTR to
+user space. In the failure path, we have to call seccomp_filter_release()
+to decrement the filter's refcount.
+
+Initially, we called it in free_task() called from the failure path, but
+the commit 3a15fb6ed92c ("seccomp: release filter after task is fully
+dead") moved it to release_task() to notify user space as early as possible
+that the filter is no longer used.
+
+To keep the change and current seccomp refcount semantics, let's move
+copy_seccomp() just after the signal check and add a WARN_ON_ONCE() in
+free_task() for future debugging.
+
+[0]:
+unreferenced object 0xffff8880063add00 (size 256):
+ comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.914s)
+ hex dump (first 32 bytes):
+ 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................
+ ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................
+ backtrace:
+ do_seccomp (./include/linux/slab.h:600 ./include/linux/slab.h:733 kernel/seccomp.c:666 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991)
+ do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+ entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+unreferenced object 0xffffc90000035000 (size 4096):
+ comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.915s)
+ hex dump (first 32 bytes):
+ 01 00 00 00 00 00 00 00 00 00 00 00 05 00 00 00 ................
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace:
+ __vmalloc_node_range (mm/vmalloc.c:3226)
+ __vmalloc_node (mm/vmalloc.c:3261 (discriminator 4))
+ bpf_prog_alloc_no_stats (kernel/bpf/core.c:91)
+ bpf_prog_alloc (kernel/bpf/core.c:129)
+ bpf_prog_create_from_user (net/core/filter.c:1414)
+ do_seccomp (kernel/seccomp.c:671 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991)
+ do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+ entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+unreferenced object 0xffff888003fa1000 (size 1024):
+ comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.915s)
+ hex dump (first 32 bytes):
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace:
+ bpf_prog_alloc_no_stats (./include/linux/slab.h:600 ./include/linux/slab.h:733 kernel/bpf/core.c:95)
+ bpf_prog_alloc (kernel/bpf/core.c:129)
+ bpf_prog_create_from_user (net/core/filter.c:1414)
+ do_seccomp (kernel/seccomp.c:671 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991)
+ do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+ entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+unreferenced object 0xffff888006360240 (size 16):
+ comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.915s)
+ hex dump (first 16 bytes):
+ 01 00 37 00 76 65 72 6c e0 83 01 06 80 88 ff ff ..7.verl........
+ backtrace:
+ bpf_prog_store_orig_filter (net/core/filter.c:1137)
+ bpf_prog_create_from_user (net/core/filter.c:1428)
+ do_seccomp (kernel/seccomp.c:671 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991)
+ do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+ entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+unreferenced object 0xffff8880060183e0 (size 8):
+ comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.915s)
+ hex dump (first 8 bytes):
+ 06 00 00 00 00 00 ff 7f ........
+ backtrace:
+ kmemdup (mm/util.c:129)
+ bpf_prog_store_orig_filter (net/core/filter.c:1144)
+ bpf_prog_create_from_user (net/core/filter.c:1428)
+ do_seccomp (kernel/seccomp.c:671 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991)
+ do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+ entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+
+[1]: https://syzkaller.appspot.com/bug?id=2809bb0ac77ad9aa3f4afe42d6a610aba594a987
+
+[2]:
+#define _GNU_SOURCE
+#include <sched.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+
+void main(void)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog fprog = {
+ .len = sizeof(filter) / sizeof(filter[0]),
+ .filter = filter,
+ };
+ long i, pid;
+
+ syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0, &fprog);
+
+ for (i = 0; i < 2; i++) {
+ pid = syscall(__NR_clone, CLONE_NEWNET | SIGKILL, NULL, NULL, 0);
+ if (pid == 0)
+ return;
+ }
+}
+
+Fixes: 3a15fb6ed92c ("seccomp: release filter after task is fully dead")
+Reported-by: syzbot+ab17848fe269b573eb71@syzkaller.appspotmail.com
+Reported-by: Ayushman Dutta <ayudutta@amazon.com>
+Suggested-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Christian Brauner (Microsoft) <brauner@kernel.org>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20220823154532.82913-1-kuniyu@amazon.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/fork.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/fork.c b/kernel/fork.c
+index a5bc0c6a00fd..c6a289317e89 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -441,6 +441,9 @@ void put_task_stack(struct task_struct *tsk)
+
+ void free_task(struct task_struct *tsk)
+ {
++#ifdef CONFIG_SECCOMP
++ WARN_ON_ONCE(tsk->seccomp.filter);
++#endif
+ scs_release(tsk);
+
+ #ifndef CONFIG_THREAD_INFO_IN_TASK
+@@ -2248,12 +2251,6 @@ static __latent_entropy struct task_struct *copy_process(
+
+ spin_lock(¤t->sighand->siglock);
+
+- /*
+- * Copy seccomp details explicitly here, in case they were changed
+- * before holding sighand lock.
+- */
+- copy_seccomp(p);
+-
+ rseq_fork(p, clone_flags);
+
+ /* Don't start children in a dying pid namespace */
+@@ -2268,6 +2265,14 @@ static __latent_entropy struct task_struct *copy_process(
+ goto bad_fork_cancel_cgroup;
+ }
+
++ /* No more failure paths after this point. */
++
++ /*
++ * Copy seccomp details explicitly here, in case they were changed
++ * before holding sighand lock.
++ */
++ copy_seccomp(p);
++
+ init_task_pid_links(p);
+ if (likely(p->pid)) {
+ ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
+--
+2.30.2
+