]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Apr 2023 08:43:31 +0000 (10:43 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Apr 2023 08:43:31 +0000 (10:43 +0200)
added patches:
mm-mempolicy-fix-use-after-free-of-vma-iterator.patch
mptcp-fix-accept-vs-worker-race.patch
mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch

queue-6.1/mm-mempolicy-fix-use-after-free-of-vma-iterator.patch [new file with mode: 0644]
queue-6.1/mptcp-fix-accept-vs-worker-race.patch [new file with mode: 0644]
queue-6.1/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch [new file with mode: 0644]
queue-6.1/series

diff --git a/queue-6.1/mm-mempolicy-fix-use-after-free-of-vma-iterator.patch b/queue-6.1/mm-mempolicy-fix-use-after-free-of-vma-iterator.patch
new file mode 100644 (file)
index 0000000..d2915b8
--- /dev/null
@@ -0,0 +1,211 @@
+From f4e9e0e69468583c2c6d9d5c7bfc975e292bf188 Mon Sep 17 00:00:00 2001
+From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
+Date: Mon, 10 Apr 2023 11:22:05 -0400
+Subject: mm/mempolicy: fix use-after-free of VMA iterator
+
+From: Liam R. Howlett <Liam.Howlett@oracle.com>
+
+commit f4e9e0e69468583c2c6d9d5c7bfc975e292bf188 upstream.
+
+set_mempolicy_home_node() iterates over a list of VMAs and calls
+mbind_range() on each VMA, which also iterates over the singular list of
+the VMA passed in and potentially splits the VMA.  Since the VMA iterator
+is not passed through, set_mempolicy_home_node() may now point to a stale
+node in the VMA tree.  This can result in a UAF as reported by syzbot.
+
+Avoid the stale maple tree node by passing the VMA iterator through to the
+underlying call to split_vma().
+
+mbind_range() is also overly complicated, since there are two calling
+functions and one already handles iterating over the VMAs.  Simplify
+mbind_range() to only handle merging and splitting of the VMAs.
+
+Align the new loop in do_mbind() and existing loop in
+set_mempolicy_home_node() to use the reduced mbind_range() function.  This
+allows for a single location of the range calculation and avoids
+constantly looking up the previous VMA (since this is a loop over the
+VMAs).
+
+Link: https://lore.kernel.org/linux-mm/000000000000c93feb05f87e24ad@google.com/
+Fixes: 66850be55e8e ("mm/mempolicy: use vma iterator & maple state instead of vma linked list")
+Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Reported-by: syzbot+a7c1ec5b1d71ceaa5186@syzkaller.appspotmail.com
+  Link: https://lkml.kernel.org/r/20230410152205.2294819-1-Liam.Howlett@oracle.com
+Tested-by: syzbot+a7c1ec5b1d71ceaa5186@syzkaller.appspotmail.com
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mempolicy.c |  113 ++++++++++++++++++++++++++-------------------------------
+ 1 file changed, 52 insertions(+), 61 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -784,70 +784,56 @@ static int vma_replace_policy(struct vm_
+       return err;
+ }
+-/* Step 2: apply policy to a range and do splits. */
+-static int mbind_range(struct mm_struct *mm, unsigned long start,
+-                     unsigned long end, struct mempolicy *new_pol)
++/* Split or merge the VMA (if required) and apply the new policy */
++static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma,
++              struct vm_area_struct **prev, unsigned long start,
++              unsigned long end, struct mempolicy *new_pol)
+ {
+-      MA_STATE(mas, &mm->mm_mt, start, start);
+-      struct vm_area_struct *prev;
+-      struct vm_area_struct *vma;
+-      int err = 0;
++      struct vm_area_struct *merged;
++      unsigned long vmstart, vmend;
+       pgoff_t pgoff;
++      int err;
+-      prev = mas_prev(&mas, 0);
+-      if (unlikely(!prev))
+-              mas_set(&mas, start);
++      vmend = min(end, vma->vm_end);
++      if (start > vma->vm_start) {
++              *prev = vma;
++              vmstart = start;
++      } else {
++              vmstart = vma->vm_start;
++      }
+-      vma = mas_find(&mas, end - 1);
+-      if (WARN_ON(!vma))
++      if (mpol_equal(vma_policy(vma), new_pol))
+               return 0;
+-      if (start > vma->vm_start)
+-              prev = vma;
++      pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT);
++      merged = vma_merge(vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags,
++                         vma->anon_vma, vma->vm_file, pgoff, new_pol,
++                         vma->vm_userfaultfd_ctx, anon_vma_name(vma));
++      if (merged) {
++              *prev = merged;
++              /* vma_merge() invalidated the mas */
++              mas_pause(&vmi->mas);
++              return vma_replace_policy(merged, new_pol);
++      }
+-      for (; vma; vma = mas_next(&mas, end - 1)) {
+-              unsigned long vmstart = max(start, vma->vm_start);
+-              unsigned long vmend = min(end, vma->vm_end);
+-
+-              if (mpol_equal(vma_policy(vma), new_pol))
+-                      goto next;
+-
+-              pgoff = vma->vm_pgoff +
+-                      ((vmstart - vma->vm_start) >> PAGE_SHIFT);
+-              prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
+-                               vma->anon_vma, vma->vm_file, pgoff,
+-                               new_pol, vma->vm_userfaultfd_ctx,
+-                               anon_vma_name(vma));
+-              if (prev) {
+-                      /* vma_merge() invalidated the mas */
+-                      mas_pause(&mas);
+-                      vma = prev;
+-                      goto replace;
+-              }
+-              if (vma->vm_start != vmstart) {
+-                      err = split_vma(vma->vm_mm, vma, vmstart, 1);
+-                      if (err)
+-                              goto out;
+-                      /* split_vma() invalidated the mas */
+-                      mas_pause(&mas);
+-              }
+-              if (vma->vm_end != vmend) {
+-                      err = split_vma(vma->vm_mm, vma, vmend, 0);
+-                      if (err)
+-                              goto out;
+-                      /* split_vma() invalidated the mas */
+-                      mas_pause(&mas);
+-              }
+-replace:
+-              err = vma_replace_policy(vma, new_pol);
++      if (vma->vm_start != vmstart) {
++              err = split_vma(vma->vm_mm, vma, vmstart, 1);
+               if (err)
+-                      goto out;
+-next:
+-              prev = vma;
++                      return err;
++              /* split_vma() invalidated the mas */
++              mas_pause(&vmi->mas);
+       }
+-out:
+-      return err;
++      if (vma->vm_end != vmend) {
++              err = split_vma(vma->vm_mm, vma, vmend, 0);
++              if (err)
++                      return err;
++              /* split_vma() invalidated the mas */
++              mas_pause(&vmi->mas);
++      }
++
++      *prev = vma;
++      return vma_replace_policy(vma, new_pol);
+ }
+ /* Set the process memory policy */
+@@ -1259,6 +1245,8 @@ static long do_mbind(unsigned long start
+                    nodemask_t *nmask, unsigned long flags)
+ {
+       struct mm_struct *mm = current->mm;
++      struct vm_area_struct *vma, *prev;
++      struct vma_iterator vmi;
+       struct mempolicy *new;
+       unsigned long end;
+       int err;
+@@ -1328,7 +1316,13 @@ static long do_mbind(unsigned long start
+               goto up_out;
+       }
+-      err = mbind_range(mm, start, end, new);
++      vma_iter_init(&vmi, mm, start);
++      prev = vma_prev(&vmi);
++      for_each_vma_range(vmi, vma, end) {
++              err = mbind_range(&vmi, vma, &prev, start, end, new);
++              if (err)
++                      break;
++      }
+       if (!err) {
+               int nr_failed = 0;
+@@ -1489,10 +1483,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node,
+               unsigned long, home_node, unsigned long, flags)
+ {
+       struct mm_struct *mm = current->mm;
+-      struct vm_area_struct *vma;
++      struct vm_area_struct *vma, *prev;
+       struct mempolicy *new;
+-      unsigned long vmstart;
+-      unsigned long vmend;
+       unsigned long end;
+       int err = -ENOENT;
+       VMA_ITERATOR(vmi, mm, start);
+@@ -1521,9 +1513,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node,
+       if (end == start)
+               return 0;
+       mmap_write_lock(mm);
++      prev = vma_prev(&vmi);
+       for_each_vma_range(vmi, vma, end) {
+-              vmstart = max(start, vma->vm_start);
+-              vmend   = min(end, vma->vm_end);
+               new = mpol_dup(vma_policy(vma));
+               if (IS_ERR(new)) {
+                       err = PTR_ERR(new);
+@@ -1547,7 +1538,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node,
+               }
+               new->home_node = home_node;
+-              err = mbind_range(mm, vmstart, vmend, new);
++              err = mbind_range(&vmi, vma, &prev, start, end, new);
+               mpol_put(new);
+               if (err)
+                       break;
diff --git a/queue-6.1/mptcp-fix-accept-vs-worker-race.patch b/queue-6.1/mptcp-fix-accept-vs-worker-race.patch
new file mode 100644 (file)
index 0000000..b91d499
--- /dev/null
@@ -0,0 +1,283 @@
+From 63740448a32eb662e05894425b47bcc5814136f4 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Mon, 17 Apr 2023 16:00:41 +0200
+Subject: mptcp: fix accept vs worker race
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 63740448a32eb662e05894425b47bcc5814136f4 upstream.
+
+The mptcp worker and mptcp_accept() can race, as reported by Christoph:
+
+refcount_t: addition on 0; use-after-free.
+WARNING: CPU: 1 PID: 14351 at lib/refcount.c:25 refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25
+Modules linked in:
+CPU: 1 PID: 14351 Comm: syz-executor.2 Not tainted 6.3.0-rc1-gde5e8fd0123c #11
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+RIP: 0010:refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25
+Code: 02 31 ff 89 de e8 1b f0 a7 ff 84 db 0f 85 6e ff ff ff e8 3e f5 a7 ff 48 c7 c7 d8 c7 34 83 c6 05 6d 2d 0f 02 01 e8 cb 3d 90 ff <0f> 0b e9 4f ff ff ff e8 1f f5 a7 ff 0f b6 1d 54 2d 0f 02 31 ff 89
+RSP: 0018:ffffc90000a47bf8 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
+RDX: ffff88802eae98c0 RSI: ffffffff81097d4f RDI: 0000000000000001
+RBP: ffff88802e712180 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000001 R11: ffff88802eaea148 R12: ffff88802e712100
+R13: ffff88802e712a88 R14: ffff888005cb93a8 R15: ffff88802e712a88
+FS:  0000000000000000(0000) GS:ffff88803ed00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f277fd89120 CR3: 0000000035486002 CR4: 0000000000370ee0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ __refcount_add include/linux/refcount.h:199 [inline]
+ __refcount_inc include/linux/refcount.h:250 [inline]
+ refcount_inc include/linux/refcount.h:267 [inline]
+ sock_hold include/net/sock.h:775 [inline]
+ __mptcp_close+0x4c6/0x4d0 net/mptcp/protocol.c:3051
+ mptcp_close+0x24/0xe0 net/mptcp/protocol.c:3072
+ inet_release+0x56/0xa0 net/ipv4/af_inet.c:429
+ __sock_release+0x51/0xf0 net/socket.c:653
+ sock_close+0x18/0x20 net/socket.c:1395
+ __fput+0x113/0x430 fs/file_table.c:321
+ task_work_run+0x96/0x100 kernel/task_work.c:179
+ exit_task_work include/linux/task_work.h:38 [inline]
+ do_exit+0x4fc/0x10c0 kernel/exit.c:869
+ do_group_exit+0x51/0xf0 kernel/exit.c:1019
+ get_signal+0x12b0/0x1390 kernel/signal.c:2859
+ arch_do_signal_or_restart+0x25/0x260 arch/x86/kernel/signal.c:306
+ exit_to_user_mode_loop kernel/entry/common.c:168 [inline]
+ exit_to_user_mode_prepare+0x131/0x1a0 kernel/entry/common.c:203
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
+ syscall_exit_to_user_mode+0x19/0x40 kernel/entry/common.c:296
+ do_syscall_64+0x46/0x90 arch/x86/entry/common.c:86
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+RIP: 0033:0x7fec4b4926a9
+Code: Unable to access opcode bytes at 0x7fec4b49267f.
+RSP: 002b:00007fec49f9dd78 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
+RAX: fffffffffffffe00 RBX: 00000000006bc058 RCX: 00007fec4b4926a9
+RDX: 0000000000000000 RSI: 0000000000000080 RDI: 00000000006bc058
+RBP: 00000000006bc050 R08: 00000000007df998 R09: 00000000007df998
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006bc05c
+R13: fffffffffffffea8 R14: 000000000000000b R15: 000000000001fe40
+ </TASK>
+
+The root cause is that the worker can force fallback to TCP the first
+mptcp subflow, actually deleting the unaccepted msk socket.
+
+We can explicitly prevent the race delaying the unaccepted msk deletion
+at listener shutdown time. In case the closed subflow is later accepted,
+just drop the mptcp context and let the user-space deal with the
+paired mptcp socket.
+
+Fixes: b6985b9b8295 ("mptcp: use the workqueue to destroy unaccepted sockets")
+Cc: stable@vger.kernel.org
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/375
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Tested-by: Christoph Paasch <cpaasch@apple.com>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |   68 +++++++++++++++++++++++++++++++++------------------
+ net/mptcp/protocol.h |    1 
+ net/mptcp/subflow.c  |   22 +++++++++-------
+ 3 files changed, 58 insertions(+), 33 deletions(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2330,7 +2330,26 @@ static void __mptcp_close_ssk(struct soc
+                             unsigned int flags)
+ {
+       struct mptcp_sock *msk = mptcp_sk(sk);
+-      bool need_push, dispose_it;
++      bool dispose_it, need_push = false;
++
++      /* If the first subflow moved to a close state before accept, e.g. due
++       * to an incoming reset, mptcp either:
++       * - if either the subflow or the msk are dead, destroy the context
++       *   (the subflow socket is deleted by inet_child_forget) and the msk
++       * - otherwise do nothing at the moment and take action at accept and/or
++       *   listener shutdown - user-space must be able to accept() the closed
++       *   socket.
++       */
++      if (msk->in_accept_queue && msk->first == ssk) {
++              if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
++                      return;
++
++              /* ensure later check in mptcp_worker() will dispose the msk */
++              sock_set_flag(sk, SOCK_DEAD);
++              lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
++              mptcp_subflow_drop_ctx(ssk);
++              goto out_release;
++      }
+       dispose_it = !msk->subflow || ssk != msk->subflow->sk;
+       if (dispose_it)
+@@ -2366,18 +2385,6 @@ static void __mptcp_close_ssk(struct soc
+       if (!inet_csk(ssk)->icsk_ulp_ops) {
+               WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
+               kfree_rcu(subflow, rcu);
+-      } else if (msk->in_accept_queue && msk->first == ssk) {
+-              /* if the first subflow moved to a close state, e.g. due to
+-               * incoming reset and we reach here before inet_child_forget()
+-               * the TCP stack could later try to close it via
+-               * inet_csk_listen_stop(), or deliver it to the user space via
+-               * accept().
+-               * We can't delete the subflow - or risk a double free - nor let
+-               * the msk survive - or will be leaked in the non accept scenario:
+-               * fallback and let TCP cope with the subflow cleanup.
+-               */
+-              WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
+-              mptcp_subflow_drop_ctx(ssk);
+       } else {
+               /* otherwise tcp will dispose of the ssk and subflow ctx */
+               if (ssk->sk_state == TCP_LISTEN) {
+@@ -2391,6 +2398,8 @@ static void __mptcp_close_ssk(struct soc
+               /* close acquired an extra ref */
+               __sock_put(ssk);
+       }
++
++out_release:
+       release_sock(ssk);
+       sock_put(ssk);
+@@ -2445,21 +2454,14 @@ static void __mptcp_close_subflow(struct
+               mptcp_close_ssk(sk, ssk, subflow);
+       }
+-      /* if the MPC subflow has been closed before the msk is accepted,
+-       * msk will never be accept-ed, close it now
+-       */
+-      if (!msk->first && msk->in_accept_queue) {
+-              sock_set_flag(sk, SOCK_DEAD);
+-              inet_sk_state_store(sk, TCP_CLOSE);
+-      }
+ }
+-static bool mptcp_check_close_timeout(const struct sock *sk)
++static bool mptcp_should_close(const struct sock *sk)
+ {
+       s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
+       struct mptcp_subflow_context *subflow;
+-      if (delta >= TCP_TIMEWAIT_LEN)
++      if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
+               return true;
+       /* if all subflows are in closed status don't bother with additional
+@@ -2667,7 +2669,7 @@ static void mptcp_worker(struct work_str
+        * even if it is orphaned and in FIN_WAIT2 state
+        */
+       if (sock_flag(sk, SOCK_DEAD)) {
+-              if (mptcp_check_close_timeout(sk)) {
++              if (mptcp_should_close(sk)) {
+                       inet_sk_state_store(sk, TCP_CLOSE);
+                       mptcp_do_fastclose(sk);
+               }
+@@ -2912,6 +2914,14 @@ static void __mptcp_destroy_sock(struct
+       sock_put(sk);
+ }
++void __mptcp_unaccepted_force_close(struct sock *sk)
++{
++      sock_set_flag(sk, SOCK_DEAD);
++      inet_sk_state_store(sk, TCP_CLOSE);
++      mptcp_do_fastclose(sk);
++      __mptcp_destroy_sock(sk);
++}
++
+ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
+ {
+       /* Concurrent splices from sk_receive_queue into receive_queue will
+@@ -3759,6 +3769,18 @@ static int mptcp_stream_accept(struct so
+                       if (!ssk->sk_socket)
+                               mptcp_sock_graft(ssk, newsock);
+               }
++
++              /* Do late cleanup for the first subflow as necessary. Also
++               * deal with bad peers not doing a complete shutdown.
++               */
++              if (msk->first &&
++                  unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
++                      __mptcp_close_ssk(newsk, msk->first,
++                                        mptcp_subflow_ctx(msk->first), 0);
++                      if (unlikely(list_empty(&msk->conn_list)))
++                              inet_sk_state_store(newsk, TCP_CLOSE);
++              }
++
+               release_sock(newsk);
+       }
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -620,6 +620,7 @@ void mptcp_sock_graft(struct sock *sk, s
+ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+ bool __mptcp_close(struct sock *sk, long timeout);
+ void mptcp_cancel_work(struct sock *sk);
++void __mptcp_unaccepted_force_close(struct sock *sk);
+ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+                          const struct mptcp_addr_info *b, bool use_port);
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -661,9 +661,12 @@ void mptcp_subflow_drop_ctx(struct sock
+       if (!ctx)
+               return;
+-      subflow_ulp_fallback(ssk, ctx);
+-      if (ctx->conn)
+-              sock_put(ctx->conn);
++      list_del(&mptcp_subflow_ctx(ssk)->node);
++      if (inet_csk(ssk)->icsk_ulp_ops) {
++              subflow_ulp_fallback(ssk, ctx);
++              if (ctx->conn)
++                      sock_put(ctx->conn);
++      }
+       kfree_rcu(ctx, rcu);
+ }
+@@ -1763,6 +1766,7 @@ void mptcp_subflow_queue_clean(struct so
+       struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+       struct mptcp_sock *msk, *next, *head = NULL;
+       struct request_sock *req;
++      struct sock *sk;
+       /* build a list of all unaccepted mptcp sockets */
+       spin_lock_bh(&queue->rskq_lock);
+@@ -1778,11 +1782,12 @@ void mptcp_subflow_queue_clean(struct so
+                       continue;
+               /* skip if already in list */
+-              msk = mptcp_sk(subflow->conn);
++              sk = subflow->conn;
++              msk = mptcp_sk(sk);
+               if (msk->dl_next || msk == head)
+                       continue;
+-              sock_hold(subflow->conn);
++              sock_hold(sk);
+               msk->dl_next = head;
+               head = msk;
+       }
+@@ -1796,16 +1801,13 @@ void mptcp_subflow_queue_clean(struct so
+       release_sock(listener_ssk);
+       for (msk = head; msk; msk = next) {
+-              struct sock *sk = (struct sock *)msk;
++              sk = (struct sock *)msk;
+               lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+               next = msk->dl_next;
+               msk->dl_next = NULL;
+-              /* prevent the stack from later re-schedule the worker for
+-               * this socket
+-               */
+-              inet_sk_state_store(sk, TCP_CLOSE);
++              __mptcp_unaccepted_force_close(sk);
+               release_sock(sk);
+               /* lockdep will report a false positive ABBA deadlock
diff --git a/queue-6.1/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch b/queue-6.1/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch
new file mode 100644 (file)
index 0000000..cd124fb
--- /dev/null
@@ -0,0 +1,192 @@
+From 2a6a870e44dd88f1a6a2893c65ef756a9edfb4c7 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Mon, 17 Apr 2023 16:00:40 +0200
+Subject: mptcp: stops worker on unaccepted sockets at listener close
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 2a6a870e44dd88f1a6a2893c65ef756a9edfb4c7 upstream.
+
+This is a partial revert of the blamed commit, with a relevant
+change: mptcp_subflow_queue_clean() now just change the msk
+socket status and stop the worker, so that the UaF issue addressed
+by the blamed commit is not re-introduced.
+
+The above prevents the mptcp worker from running concurrently with
+inet_csk_listen_stop(), as such race would trigger a warning, as
+reported by Christoph:
+
+RSP: 002b:00007f784fe09cd8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+WARNING: CPU: 0 PID: 25807 at net/ipv4/inet_connection_sock.c:1387 inet_csk_listen_stop+0x664/0x870 net/ipv4/inet_connection_sock.c:1387
+RAX: ffffffffffffffda RBX: 00000000006bc050 RCX: 00007f7850afd6a9
+RDX: 0000000000000000 RSI: 0000000020000340 RDI: 0000000000000004
+Modules linked in:
+RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006bc05c
+R13: fffffffffffffea8 R14: 00000000006bc050 R15: 000000000001fe40
+
+ </TASK>
+CPU: 0 PID: 25807 Comm: syz-executor.7 Not tainted 6.2.0-g778e54711659 #7
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+RIP: 0010:inet_csk_listen_stop+0x664/0x870 net/ipv4/inet_connection_sock.c:1387
+RAX: 0000000000000000 RBX: ffff888100dfbd40 RCX: 0000000000000000
+RDX: ffff8881363aab80 RSI: ffffffff81c494f4 RDI: 0000000000000005
+RBP: ffff888126dad080 R08: 0000000000000005 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000000 R12: ffff888100dfe040
+R13: 0000000000000001 R14: 0000000000000000 R15: ffff888100dfbdd8
+FS:  00007f7850a2c800(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000001b32d26000 CR3: 000000012fdd8006 CR4: 0000000000770ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ __tcp_close+0x5b2/0x620 net/ipv4/tcp.c:2875
+ __mptcp_close_ssk+0x145/0x3d0 net/mptcp/protocol.c:2427
+ mptcp_destroy_common+0x8a/0x1c0 net/mptcp/protocol.c:3277
+ mptcp_destroy+0x41/0x60 net/mptcp/protocol.c:3304
+ __mptcp_destroy_sock+0x56/0x140 net/mptcp/protocol.c:2965
+ __mptcp_close+0x38f/0x4a0 net/mptcp/protocol.c:3057
+ mptcp_close+0x24/0xe0 net/mptcp/protocol.c:3072
+ inet_release+0x53/0xa0 net/ipv4/af_inet.c:429
+ __sock_release+0x4e/0xf0 net/socket.c:651
+ sock_close+0x15/0x20 net/socket.c:1393
+ __fput+0xff/0x420 fs/file_table.c:321
+ task_work_run+0x8b/0xe0 kernel/task_work.c:179
+ resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
+ exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
+ exit_to_user_mode_prepare+0x113/0x120 kernel/entry/common.c:203
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
+ syscall_exit_to_user_mode+0x1d/0x40 kernel/entry/common.c:296
+ do_syscall_64+0x46/0x90 arch/x86/entry/common.c:86
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+RIP: 0033:0x7f7850af70dc
+RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f7850af70dc
+RDX: 00007f7850a2c800 RSI: 0000000000000002 RDI: 0000000000000003
+RBP: 00000000006bd980 R08: 0000000000000000 R09: 00000000000018a0
+R10: 00000000316338a4 R11: 0000000000000293 R12: 0000000000211e31
+R13: 00000000006bc05c R14: 00007f785062c000 R15: 0000000000211af0
+
+Fixes: 0a3f4f1f9c27 ("mptcp: fix UaF in listener shutdown")
+Cc: stable@vger.kernel.org
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/371
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |    6 ++++
+ net/mptcp/protocol.h |    1 
+ net/mptcp/subflow.c  |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 79 insertions(+)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2380,6 +2380,12 @@ static void __mptcp_close_ssk(struct soc
+               mptcp_subflow_drop_ctx(ssk);
+       } else {
+               /* otherwise tcp will dispose of the ssk and subflow ctx */
++              if (ssk->sk_state == TCP_LISTEN) {
++                      tcp_set_state(ssk, TCP_CLOSE);
++                      mptcp_subflow_queue_clean(sk, ssk);
++                      inet_csk_listen_stop(ssk);
++              }
++
+               __tcp_close(ssk, 0);
+               /* close acquired an extra ref */
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -615,6 +615,7 @@ void mptcp_close_ssk(struct sock *sk, st
+                    struct mptcp_subflow_context *subflow);
+ void __mptcp_subflow_send_ack(struct sock *ssk);
+ void mptcp_subflow_reset(struct sock *ssk);
++void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
+ void mptcp_sock_graft(struct sock *sk, struct socket *parent);
+ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+ bool __mptcp_close(struct sock *sk, long timeout);
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1758,6 +1758,78 @@ static void subflow_state_change(struct
+       }
+ }
++void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
++{
++      struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
++      struct mptcp_sock *msk, *next, *head = NULL;
++      struct request_sock *req;
++
++      /* build a list of all unaccepted mptcp sockets */
++      spin_lock_bh(&queue->rskq_lock);
++      for (req = queue->rskq_accept_head; req; req = req->dl_next) {
++              struct mptcp_subflow_context *subflow;
++              struct sock *ssk = req->sk;
++
++              if (!sk_is_mptcp(ssk))
++                      continue;
++
++              subflow = mptcp_subflow_ctx(ssk);
++              if (!subflow || !subflow->conn)
++                      continue;
++
++              /* skip if already in list */
++              msk = mptcp_sk(subflow->conn);
++              if (msk->dl_next || msk == head)
++                      continue;
++
++              sock_hold(subflow->conn);
++              msk->dl_next = head;
++              head = msk;
++      }
++      spin_unlock_bh(&queue->rskq_lock);
++      if (!head)
++              return;
++
++      /* can't acquire the msk socket lock under the subflow one,
++       * or will cause ABBA deadlock
++       */
++      release_sock(listener_ssk);
++
++      for (msk = head; msk; msk = next) {
++              struct sock *sk = (struct sock *)msk;
++
++              lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
++              next = msk->dl_next;
++              msk->dl_next = NULL;
++
++              /* prevent the stack from later re-schedule the worker for
++               * this socket
++               */
++              inet_sk_state_store(sk, TCP_CLOSE);
++              release_sock(sk);
++
++              /* lockdep will report a false positive ABBA deadlock
++               * between cancel_work_sync and the listener socket.
++               * The involved locks belong to different sockets WRT
++               * the existing AB chain.
++               * Using a per socket key is problematic as key
++               * deregistration requires process context and must be
++               * performed at socket disposal time, in atomic
++               * context.
++               * Just tell lockdep to consider the listener socket
++               * released here.
++               */
++              mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
++              mptcp_cancel_work(sk);
++              mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_);
++
++              sock_put(sk);
++      }
++
++      /* we are still under the listener msk socket lock */
++      lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
++}
++
+ static int subflow_ulp_init(struct sock *sk)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
index 4cf976df5de4a46c3913cde383927baa2658efb6..61da2730a0f748020f8efaa3a2ddad7e1c4e84c6 100644 (file)
@@ -1,3 +1,6 @@
 um-only-disable-sse-on-clang-to-work-around-old-gcc-bugs.patch
 phy-phy-brcm-usb-utilize-platform_get_irq_byname_optional.patch
 kvm-arm64-retry-fault-if-vma_lookup-results-become-invalid.patch
+mm-mempolicy-fix-use-after-free-of-vma-iterator.patch
+mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch
+mptcp-fix-accept-vs-worker-race.patch