From: Greg Kroah-Hartman Date: Thu, 27 Apr 2023 08:43:31 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v5.15.110~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5bdb561cc87c4c93bd4b283e5dcc5c68f33c543d;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: mm-mempolicy-fix-use-after-free-of-vma-iterator.patch mptcp-fix-accept-vs-worker-race.patch mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch --- diff --git a/queue-6.1/mm-mempolicy-fix-use-after-free-of-vma-iterator.patch b/queue-6.1/mm-mempolicy-fix-use-after-free-of-vma-iterator.patch new file mode 100644 index 00000000000..d2915b8b417 --- /dev/null +++ b/queue-6.1/mm-mempolicy-fix-use-after-free-of-vma-iterator.patch @@ -0,0 +1,211 @@ +From f4e9e0e69468583c2c6d9d5c7bfc975e292bf188 Mon Sep 17 00:00:00 2001 +From: "Liam R. Howlett" +Date: Mon, 10 Apr 2023 11:22:05 -0400 +Subject: mm/mempolicy: fix use-after-free of VMA iterator + +From: Liam R. Howlett + +commit f4e9e0e69468583c2c6d9d5c7bfc975e292bf188 upstream. + +set_mempolicy_home_node() iterates over a list of VMAs and calls +mbind_range() on each VMA, which also iterates over the singular list of +the VMA passed in and potentially splits the VMA. Since the VMA iterator +is not passed through, set_mempolicy_home_node() may now point to a stale +node in the VMA tree. This can result in a UAF as reported by syzbot. + +Avoid the stale maple tree node by passing the VMA iterator through to the +underlying call to split_vma(). + +mbind_range() is also overly complicated, since there are two calling +functions and one already handles iterating over the VMAs. Simplify +mbind_range() to only handle merging and splitting of the VMAs. + +Align the new loop in do_mbind() and existing loop in +set_mempolicy_home_node() to use the reduced mbind_range() function. This +allows for a single location of the range calculation and avoids +constantly looking up the previous VMA (since this is a loop over the +VMAs). + +Link: https://lore.kernel.org/linux-mm/000000000000c93feb05f87e24ad@google.com/ +Fixes: 66850be55e8e ("mm/mempolicy: use vma iterator & maple state instead of vma linked list") +Signed-off-by: Liam R. Howlett +Reported-by: syzbot+a7c1ec5b1d71ceaa5186@syzkaller.appspotmail.com + Link: https://lkml.kernel.org/r/20230410152205.2294819-1-Liam.Howlett@oracle.com +Tested-by: syzbot+a7c1ec5b1d71ceaa5186@syzkaller.appspotmail.com +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Liam R. Howlett +Signed-off-by: Greg Kroah-Hartman +--- + mm/mempolicy.c | 113 ++++++++++++++++++++++++++------------------------------- + 1 file changed, 52 insertions(+), 61 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -784,70 +784,56 @@ static int vma_replace_policy(struct vm_ + return err; + } + +-/* Step 2: apply policy to a range and do splits. */ +-static int mbind_range(struct mm_struct *mm, unsigned long start, +- unsigned long end, struct mempolicy *new_pol) ++/* Split or merge the VMA (if required) and apply the new policy */ ++static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma, ++ struct vm_area_struct **prev, unsigned long start, ++ unsigned long end, struct mempolicy *new_pol) + { +- MA_STATE(mas, &mm->mm_mt, start, start); +- struct vm_area_struct *prev; +- struct vm_area_struct *vma; +- int err = 0; ++ struct vm_area_struct *merged; ++ unsigned long vmstart, vmend; + pgoff_t pgoff; ++ int err; + +- prev = mas_prev(&mas, 0); +- if (unlikely(!prev)) +- mas_set(&mas, start); ++ vmend = min(end, vma->vm_end); ++ if (start > vma->vm_start) { ++ *prev = vma; ++ vmstart = start; ++ } else { ++ vmstart = vma->vm_start; ++ } + +- vma = mas_find(&mas, end - 1); +- if (WARN_ON(!vma)) ++ if (mpol_equal(vma_policy(vma), new_pol)) + return 0; + +- if (start > vma->vm_start) +- prev = vma; ++ pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT); ++ merged = vma_merge(vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags, ++ vma->anon_vma, vma->vm_file, pgoff, new_pol, ++ vma->vm_userfaultfd_ctx, anon_vma_name(vma)); ++ if (merged) { ++ *prev = merged; ++ /* vma_merge() invalidated the mas */ ++ mas_pause(&vmi->mas); ++ return vma_replace_policy(merged, new_pol); ++ } + +- for (; vma; vma = mas_next(&mas, end - 1)) { +- unsigned long vmstart = max(start, vma->vm_start); +- unsigned long vmend = min(end, vma->vm_end); +- +- if (mpol_equal(vma_policy(vma), new_pol)) +- goto next; +- +- pgoff = vma->vm_pgoff + +- ((vmstart - vma->vm_start) >> PAGE_SHIFT); +- prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, +- vma->anon_vma, vma->vm_file, pgoff, +- new_pol, vma->vm_userfaultfd_ctx, +- anon_vma_name(vma)); +- if (prev) { +- /* vma_merge() invalidated the mas */ +- mas_pause(&mas); +- vma = prev; +- goto replace; +- } +- if (vma->vm_start != vmstart) { +- err = split_vma(vma->vm_mm, vma, vmstart, 1); +- if (err) +- goto out; +- /* split_vma() invalidated the mas */ +- mas_pause(&mas); +- } +- if (vma->vm_end != vmend) { +- err = split_vma(vma->vm_mm, vma, vmend, 0); +- if (err) +- goto out; +- /* split_vma() invalidated the mas */ +- mas_pause(&mas); +- } +-replace: +- err = vma_replace_policy(vma, new_pol); ++ if (vma->vm_start != vmstart) { ++ err = split_vma(vma->vm_mm, vma, vmstart, 1); + if (err) +- goto out; +-next: +- prev = vma; ++ return err; ++ /* split_vma() invalidated the mas */ ++ mas_pause(&vmi->mas); + } + +-out: +- return err; ++ if (vma->vm_end != vmend) { ++ err = split_vma(vma->vm_mm, vma, vmend, 0); ++ if (err) ++ return err; ++ /* split_vma() invalidated the mas */ ++ mas_pause(&vmi->mas); ++ } ++ ++ *prev = vma; ++ return vma_replace_policy(vma, new_pol); + } + + /* Set the process memory policy */ +@@ -1259,6 +1245,8 @@ static long do_mbind(unsigned long start + nodemask_t *nmask, unsigned long flags) + { + struct mm_struct *mm = current->mm; ++ struct vm_area_struct *vma, *prev; ++ struct vma_iterator vmi; + struct mempolicy *new; + unsigned long end; + int err; +@@ -1328,7 +1316,13 @@ static long do_mbind(unsigned long start + goto up_out; + } + +- err = mbind_range(mm, start, end, new); ++ vma_iter_init(&vmi, mm, start); ++ prev = vma_prev(&vmi); ++ for_each_vma_range(vmi, vma, end) { ++ err = mbind_range(&vmi, vma, &prev, start, end, new); ++ if (err) ++ break; ++ } + + if (!err) { + int nr_failed = 0; +@@ -1489,10 +1483,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, + unsigned long, home_node, unsigned long, flags) + { + struct mm_struct *mm = current->mm; +- struct vm_area_struct *vma; ++ struct vm_area_struct *vma, *prev; + struct mempolicy *new; +- unsigned long vmstart; +- unsigned long vmend; + unsigned long end; + int err = -ENOENT; + VMA_ITERATOR(vmi, mm, start); +@@ -1521,9 +1513,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, + if (end == start) + return 0; + mmap_write_lock(mm); ++ prev = vma_prev(&vmi); + for_each_vma_range(vmi, vma, end) { +- vmstart = max(start, vma->vm_start); +- vmend = min(end, vma->vm_end); + new = mpol_dup(vma_policy(vma)); + if (IS_ERR(new)) { + err = PTR_ERR(new); +@@ -1547,7 +1538,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, + } + + new->home_node = home_node; +- err = mbind_range(mm, vmstart, vmend, new); ++ err = mbind_range(&vmi, vma, &prev, start, end, new); + mpol_put(new); + if (err) + break; diff --git a/queue-6.1/mptcp-fix-accept-vs-worker-race.patch b/queue-6.1/mptcp-fix-accept-vs-worker-race.patch new file mode 100644 index 00000000000..b91d499f619 --- /dev/null +++ b/queue-6.1/mptcp-fix-accept-vs-worker-race.patch @@ -0,0 +1,283 @@ +From 63740448a32eb662e05894425b47bcc5814136f4 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 17 Apr 2023 16:00:41 +0200 +Subject: mptcp: fix accept vs worker race + +From: Paolo Abeni + +commit 63740448a32eb662e05894425b47bcc5814136f4 upstream. + +The mptcp worker and mptcp_accept() can race, as reported by Christoph: + +refcount_t: addition on 0; use-after-free. +WARNING: CPU: 1 PID: 14351 at lib/refcount.c:25 refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25 +Modules linked in: +CPU: 1 PID: 14351 Comm: syz-executor.2 Not tainted 6.3.0-rc1-gde5e8fd0123c #11 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 +RIP: 0010:refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25 +Code: 02 31 ff 89 de e8 1b f0 a7 ff 84 db 0f 85 6e ff ff ff e8 3e f5 a7 ff 48 c7 c7 d8 c7 34 83 c6 05 6d 2d 0f 02 01 e8 cb 3d 90 ff <0f> 0b e9 4f ff ff ff e8 1f f5 a7 ff 0f b6 1d 54 2d 0f 02 31 ff 89 +RSP: 0018:ffffc90000a47bf8 EFLAGS: 00010282 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 +RDX: ffff88802eae98c0 RSI: ffffffff81097d4f RDI: 0000000000000001 +RBP: ffff88802e712180 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000001 R11: ffff88802eaea148 R12: ffff88802e712100 +R13: ffff88802e712a88 R14: ffff888005cb93a8 R15: ffff88802e712a88 +FS: 0000000000000000(0000) GS:ffff88803ed00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f277fd89120 CR3: 0000000035486002 CR4: 0000000000370ee0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + __refcount_add include/linux/refcount.h:199 [inline] + __refcount_inc include/linux/refcount.h:250 [inline] + refcount_inc include/linux/refcount.h:267 [inline] + sock_hold include/net/sock.h:775 [inline] + __mptcp_close+0x4c6/0x4d0 net/mptcp/protocol.c:3051 + mptcp_close+0x24/0xe0 net/mptcp/protocol.c:3072 + inet_release+0x56/0xa0 net/ipv4/af_inet.c:429 + __sock_release+0x51/0xf0 net/socket.c:653 + sock_close+0x18/0x20 net/socket.c:1395 + __fput+0x113/0x430 fs/file_table.c:321 + task_work_run+0x96/0x100 kernel/task_work.c:179 + exit_task_work include/linux/task_work.h:38 [inline] + do_exit+0x4fc/0x10c0 kernel/exit.c:869 + do_group_exit+0x51/0xf0 kernel/exit.c:1019 + get_signal+0x12b0/0x1390 kernel/signal.c:2859 + arch_do_signal_or_restart+0x25/0x260 arch/x86/kernel/signal.c:306 + exit_to_user_mode_loop kernel/entry/common.c:168 [inline] + exit_to_user_mode_prepare+0x131/0x1a0 kernel/entry/common.c:203 + __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] + syscall_exit_to_user_mode+0x19/0x40 kernel/entry/common.c:296 + do_syscall_64+0x46/0x90 arch/x86/entry/common.c:86 + entry_SYSCALL_64_after_hwframe+0x72/0xdc +RIP: 0033:0x7fec4b4926a9 +Code: Unable to access opcode bytes at 0x7fec4b49267f. +RSP: 002b:00007fec49f9dd78 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca +RAX: fffffffffffffe00 RBX: 00000000006bc058 RCX: 00007fec4b4926a9 +RDX: 0000000000000000 RSI: 0000000000000080 RDI: 00000000006bc058 +RBP: 00000000006bc050 R08: 00000000007df998 R09: 00000000007df998 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006bc05c +R13: fffffffffffffea8 R14: 000000000000000b R15: 000000000001fe40 + + +The root cause is that the worker can force fallback to TCP the first +mptcp subflow, actually deleting the unaccepted msk socket. + +We can explicitly prevent the race delaying the unaccepted msk deletion +at listener shutdown time. In case the closed subflow is later accepted, +just drop the mptcp context and let the user-space deal with the +paired mptcp socket. + +Fixes: b6985b9b8295 ("mptcp: use the workqueue to destroy unaccepted sockets") +Cc: stable@vger.kernel.org +Reported-by: Christoph Paasch +Link: https://github.com/multipath-tcp/mptcp_net-next/issues/375 +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts +Tested-by: Christoph Paasch +Signed-off-by: Matthieu Baerts +Signed-off-by: David S. Miller +Signed-off-by: Matthieu Baerts +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 68 +++++++++++++++++++++++++++++++++------------------ + net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 22 +++++++++------- + 3 files changed, 58 insertions(+), 33 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2330,7 +2330,26 @@ static void __mptcp_close_ssk(struct soc + unsigned int flags) + { + struct mptcp_sock *msk = mptcp_sk(sk); +- bool need_push, dispose_it; ++ bool dispose_it, need_push = false; ++ ++ /* If the first subflow moved to a close state before accept, e.g. due ++ * to an incoming reset, mptcp either: ++ * - if either the subflow or the msk are dead, destroy the context ++ * (the subflow socket is deleted by inet_child_forget) and the msk ++ * - otherwise do nothing at the moment and take action at accept and/or ++ * listener shutdown - user-space must be able to accept() the closed ++ * socket. ++ */ ++ if (msk->in_accept_queue && msk->first == ssk) { ++ if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) ++ return; ++ ++ /* ensure later check in mptcp_worker() will dispose the msk */ ++ sock_set_flag(sk, SOCK_DEAD); ++ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); ++ mptcp_subflow_drop_ctx(ssk); ++ goto out_release; ++ } + + dispose_it = !msk->subflow || ssk != msk->subflow->sk; + if (dispose_it) +@@ -2366,18 +2385,6 @@ static void __mptcp_close_ssk(struct soc + if (!inet_csk(ssk)->icsk_ulp_ops) { + WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD)); + kfree_rcu(subflow, rcu); +- } else if (msk->in_accept_queue && msk->first == ssk) { +- /* if the first subflow moved to a close state, e.g. due to +- * incoming reset and we reach here before inet_child_forget() +- * the TCP stack could later try to close it via +- * inet_csk_listen_stop(), or deliver it to the user space via +- * accept(). +- * We can't delete the subflow - or risk a double free - nor let +- * the msk survive - or will be leaked in the non accept scenario: +- * fallback and let TCP cope with the subflow cleanup. +- */ +- WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD)); +- mptcp_subflow_drop_ctx(ssk); + } else { + /* otherwise tcp will dispose of the ssk and subflow ctx */ + if (ssk->sk_state == TCP_LISTEN) { +@@ -2391,6 +2398,8 @@ static void __mptcp_close_ssk(struct soc + /* close acquired an extra ref */ + __sock_put(ssk); + } ++ ++out_release: + release_sock(ssk); + + sock_put(ssk); +@@ -2445,21 +2454,14 @@ static void __mptcp_close_subflow(struct + mptcp_close_ssk(sk, ssk, subflow); + } + +- /* if the MPC subflow has been closed before the msk is accepted, +- * msk will never be accept-ed, close it now +- */ +- if (!msk->first && msk->in_accept_queue) { +- sock_set_flag(sk, SOCK_DEAD); +- inet_sk_state_store(sk, TCP_CLOSE); +- } + } + +-static bool mptcp_check_close_timeout(const struct sock *sk) ++static bool mptcp_should_close(const struct sock *sk) + { + s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; + struct mptcp_subflow_context *subflow; + +- if (delta >= TCP_TIMEWAIT_LEN) ++ if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) + return true; + + /* if all subflows are in closed status don't bother with additional +@@ -2667,7 +2669,7 @@ static void mptcp_worker(struct work_str + * even if it is orphaned and in FIN_WAIT2 state + */ + if (sock_flag(sk, SOCK_DEAD)) { +- if (mptcp_check_close_timeout(sk)) { ++ if (mptcp_should_close(sk)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + } +@@ -2912,6 +2914,14 @@ static void __mptcp_destroy_sock(struct + sock_put(sk); + } + ++void __mptcp_unaccepted_force_close(struct sock *sk) ++{ ++ sock_set_flag(sk, SOCK_DEAD); ++ inet_sk_state_store(sk, TCP_CLOSE); ++ mptcp_do_fastclose(sk); ++ __mptcp_destroy_sock(sk); ++} ++ + static __poll_t mptcp_check_readable(struct mptcp_sock *msk) + { + /* Concurrent splices from sk_receive_queue into receive_queue will +@@ -3759,6 +3769,18 @@ static int mptcp_stream_accept(struct so + if (!ssk->sk_socket) + mptcp_sock_graft(ssk, newsock); + } ++ ++ /* Do late cleanup for the first subflow as necessary. Also ++ * deal with bad peers not doing a complete shutdown. ++ */ ++ if (msk->first && ++ unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) { ++ __mptcp_close_ssk(newsk, msk->first, ++ mptcp_subflow_ctx(msk->first), 0); ++ if (unlikely(list_empty(&msk->conn_list))) ++ inet_sk_state_store(newsk, TCP_CLOSE); ++ } ++ + release_sock(newsk); + } + +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -620,6 +620,7 @@ void mptcp_sock_graft(struct sock *sk, s + struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); + bool __mptcp_close(struct sock *sk, long timeout); + void mptcp_cancel_work(struct sock *sk); ++void __mptcp_unaccepted_force_close(struct sock *sk); + + bool mptcp_addresses_equal(const struct mptcp_addr_info *a, + const struct mptcp_addr_info *b, bool use_port); +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -661,9 +661,12 @@ void mptcp_subflow_drop_ctx(struct sock + if (!ctx) + return; + +- subflow_ulp_fallback(ssk, ctx); +- if (ctx->conn) +- sock_put(ctx->conn); ++ list_del(&mptcp_subflow_ctx(ssk)->node); ++ if (inet_csk(ssk)->icsk_ulp_ops) { ++ subflow_ulp_fallback(ssk, ctx); ++ if (ctx->conn) ++ sock_put(ctx->conn); ++ } + + kfree_rcu(ctx, rcu); + } +@@ -1763,6 +1766,7 @@ void mptcp_subflow_queue_clean(struct so + struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; + struct mptcp_sock *msk, *next, *head = NULL; + struct request_sock *req; ++ struct sock *sk; + + /* build a list of all unaccepted mptcp sockets */ + spin_lock_bh(&queue->rskq_lock); +@@ -1778,11 +1782,12 @@ void mptcp_subflow_queue_clean(struct so + continue; + + /* skip if already in list */ +- msk = mptcp_sk(subflow->conn); ++ sk = subflow->conn; ++ msk = mptcp_sk(sk); + if (msk->dl_next || msk == head) + continue; + +- sock_hold(subflow->conn); ++ sock_hold(sk); + msk->dl_next = head; + head = msk; + } +@@ -1796,16 +1801,13 @@ void mptcp_subflow_queue_clean(struct so + release_sock(listener_ssk); + + for (msk = head; msk; msk = next) { +- struct sock *sk = (struct sock *)msk; ++ sk = (struct sock *)msk; + + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + next = msk->dl_next; + msk->dl_next = NULL; + +- /* prevent the stack from later re-schedule the worker for +- * this socket +- */ +- inet_sk_state_store(sk, TCP_CLOSE); ++ __mptcp_unaccepted_force_close(sk); + release_sock(sk); + + /* lockdep will report a false positive ABBA deadlock diff --git a/queue-6.1/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch b/queue-6.1/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch new file mode 100644 index 00000000000..cd124fb5de0 --- /dev/null +++ b/queue-6.1/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch @@ -0,0 +1,192 @@ +From 2a6a870e44dd88f1a6a2893c65ef756a9edfb4c7 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 17 Apr 2023 16:00:40 +0200 +Subject: mptcp: stops worker on unaccepted sockets at listener close + +From: Paolo Abeni + +commit 2a6a870e44dd88f1a6a2893c65ef756a9edfb4c7 upstream. + +This is a partial revert of the blamed commit, with a relevant +change: mptcp_subflow_queue_clean() now just change the msk +socket status and stop the worker, so that the UaF issue addressed +by the blamed commit is not re-introduced. + +The above prevents the mptcp worker from running concurrently with +inet_csk_listen_stop(), as such race would trigger a warning, as +reported by Christoph: + +RSP: 002b:00007f784fe09cd8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +WARNING: CPU: 0 PID: 25807 at net/ipv4/inet_connection_sock.c:1387 inet_csk_listen_stop+0x664/0x870 net/ipv4/inet_connection_sock.c:1387 +RAX: ffffffffffffffda RBX: 00000000006bc050 RCX: 00007f7850afd6a9 +RDX: 0000000000000000 RSI: 0000000020000340 RDI: 0000000000000004 +Modules linked in: +RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006bc05c +R13: fffffffffffffea8 R14: 00000000006bc050 R15: 000000000001fe40 + + +CPU: 0 PID: 25807 Comm: syz-executor.7 Not tainted 6.2.0-g778e54711659 #7 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 +RIP: 0010:inet_csk_listen_stop+0x664/0x870 net/ipv4/inet_connection_sock.c:1387 +RAX: 0000000000000000 RBX: ffff888100dfbd40 RCX: 0000000000000000 +RDX: ffff8881363aab80 RSI: ffffffff81c494f4 RDI: 0000000000000005 +RBP: ffff888126dad080 R08: 0000000000000005 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000000 R12: ffff888100dfe040 +R13: 0000000000000001 R14: 0000000000000000 R15: ffff888100dfbdd8 +FS: 00007f7850a2c800(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000001b32d26000 CR3: 000000012fdd8006 CR4: 0000000000770ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +PKRU: 55555554 +Call Trace: + + __tcp_close+0x5b2/0x620 net/ipv4/tcp.c:2875 + __mptcp_close_ssk+0x145/0x3d0 net/mptcp/protocol.c:2427 + mptcp_destroy_common+0x8a/0x1c0 net/mptcp/protocol.c:3277 + mptcp_destroy+0x41/0x60 net/mptcp/protocol.c:3304 + __mptcp_destroy_sock+0x56/0x140 net/mptcp/protocol.c:2965 + __mptcp_close+0x38f/0x4a0 net/mptcp/protocol.c:3057 + mptcp_close+0x24/0xe0 net/mptcp/protocol.c:3072 + inet_release+0x53/0xa0 net/ipv4/af_inet.c:429 + __sock_release+0x4e/0xf0 net/socket.c:651 + sock_close+0x15/0x20 net/socket.c:1393 + __fput+0xff/0x420 fs/file_table.c:321 + task_work_run+0x8b/0xe0 kernel/task_work.c:179 + resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] + exit_to_user_mode_loop kernel/entry/common.c:171 [inline] + exit_to_user_mode_prepare+0x113/0x120 kernel/entry/common.c:203 + __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] + syscall_exit_to_user_mode+0x1d/0x40 kernel/entry/common.c:296 + do_syscall_64+0x46/0x90 arch/x86/entry/common.c:86 + entry_SYSCALL_64_after_hwframe+0x72/0xdc +RIP: 0033:0x7f7850af70dc +RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f7850af70dc +RDX: 00007f7850a2c800 RSI: 0000000000000002 RDI: 0000000000000003 +RBP: 00000000006bd980 R08: 0000000000000000 R09: 00000000000018a0 +R10: 00000000316338a4 R11: 0000000000000293 R12: 0000000000211e31 +R13: 00000000006bc05c R14: 00007f785062c000 R15: 0000000000211af0 + +Fixes: 0a3f4f1f9c27 ("mptcp: fix UaF in listener shutdown") +Cc: stable@vger.kernel.org +Reported-by: Christoph Paasch +Link: https://github.com/multipath-tcp/mptcp_net-next/issues/371 +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts +Signed-off-by: Matthieu Baerts +Signed-off-by: David S. Miller +Signed-off-by: Matthieu Baerts +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 6 ++++ + net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 79 insertions(+) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2380,6 +2380,12 @@ static void __mptcp_close_ssk(struct soc + mptcp_subflow_drop_ctx(ssk); + } else { + /* otherwise tcp will dispose of the ssk and subflow ctx */ ++ if (ssk->sk_state == TCP_LISTEN) { ++ tcp_set_state(ssk, TCP_CLOSE); ++ mptcp_subflow_queue_clean(sk, ssk); ++ inet_csk_listen_stop(ssk); ++ } ++ + __tcp_close(ssk, 0); + + /* close acquired an extra ref */ +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -615,6 +615,7 @@ void mptcp_close_ssk(struct sock *sk, st + struct mptcp_subflow_context *subflow); + void __mptcp_subflow_send_ack(struct sock *ssk); + void mptcp_subflow_reset(struct sock *ssk); ++void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); + void mptcp_sock_graft(struct sock *sk, struct socket *parent); + struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); + bool __mptcp_close(struct sock *sk, long timeout); +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1758,6 +1758,78 @@ static void subflow_state_change(struct + } + } + ++void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) ++{ ++ struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; ++ struct mptcp_sock *msk, *next, *head = NULL; ++ struct request_sock *req; ++ ++ /* build a list of all unaccepted mptcp sockets */ ++ spin_lock_bh(&queue->rskq_lock); ++ for (req = queue->rskq_accept_head; req; req = req->dl_next) { ++ struct mptcp_subflow_context *subflow; ++ struct sock *ssk = req->sk; ++ ++ if (!sk_is_mptcp(ssk)) ++ continue; ++ ++ subflow = mptcp_subflow_ctx(ssk); ++ if (!subflow || !subflow->conn) ++ continue; ++ ++ /* skip if already in list */ ++ msk = mptcp_sk(subflow->conn); ++ if (msk->dl_next || msk == head) ++ continue; ++ ++ sock_hold(subflow->conn); ++ msk->dl_next = head; ++ head = msk; ++ } ++ spin_unlock_bh(&queue->rskq_lock); ++ if (!head) ++ return; ++ ++ /* can't acquire the msk socket lock under the subflow one, ++ * or will cause ABBA deadlock ++ */ ++ release_sock(listener_ssk); ++ ++ for (msk = head; msk; msk = next) { ++ struct sock *sk = (struct sock *)msk; ++ ++ lock_sock_nested(sk, SINGLE_DEPTH_NESTING); ++ next = msk->dl_next; ++ msk->dl_next = NULL; ++ ++ /* prevent the stack from later re-schedule the worker for ++ * this socket ++ */ ++ inet_sk_state_store(sk, TCP_CLOSE); ++ release_sock(sk); ++ ++ /* lockdep will report a false positive ABBA deadlock ++ * between cancel_work_sync and the listener socket. ++ * The involved locks belong to different sockets WRT ++ * the existing AB chain. ++ * Using a per socket key is problematic as key ++ * deregistration requires process context and must be ++ * performed at socket disposal time, in atomic ++ * context. ++ * Just tell lockdep to consider the listener socket ++ * released here. ++ */ ++ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); ++ mptcp_cancel_work(sk); ++ mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_); ++ ++ sock_put(sk); ++ } ++ ++ /* we are still under the listener msk socket lock */ ++ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); ++} ++ + static int subflow_ulp_init(struct sock *sk) + { + struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/queue-6.1/series b/queue-6.1/series index 4cf976df5de..61da2730a0f 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -1,3 +1,6 @@ um-only-disable-sse-on-clang-to-work-around-old-gcc-bugs.patch phy-phy-brcm-usb-utilize-platform_get_irq_byname_optional.patch kvm-arm64-retry-fault-if-vma_lookup-results-become-invalid.patch +mm-mempolicy-fix-use-after-free-of-vma-iterator.patch +mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch +mptcp-fix-accept-vs-worker-race.patch