]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
drop mptcp patches from 6.12
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 4 Nov 2025 02:48:55 +0000 (11:48 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 4 Nov 2025 02:48:55 +0000 (11:48 +0900)
queue-6.12/mptcp-cleanup-mem-accounting.patch [deleted file]
queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch [deleted file]
queue-6.12/mptcp-leverage-skb-deferral-free.patch [deleted file]
queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch [deleted file]
queue-6.12/series

diff --git a/queue-6.12/mptcp-cleanup-mem-accounting.patch b/queue-6.12/mptcp-cleanup-mem-accounting.patch
deleted file mode 100644 (file)
index 6e1a8b4..0000000
+++ /dev/null
@@ -1,298 +0,0 @@
-From stable+bounces-192096-greg=kroah.com@vger.kernel.org Mon Nov  3 08:27:44 2025
-From: Sasha Levin <sashal@kernel.org>
-Date: Sun,  2 Nov 2025 18:27:33 -0500
-Subject: mptcp: cleanup mem accounting
-To: stable@vger.kernel.org
-Cc: Paolo Abeni <pabeni@redhat.com>, Mat Martineau <martineau@kernel.org>, "Matthieu Baerts (NGI0)" <matttbe@kernel.org>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
-Message-ID: <20251102232735.3652847-2-sashal@kernel.org>
-
-From: Paolo Abeni <pabeni@redhat.com>
-
-[ Upstream commit 6639498ed85fdb135dfb0dfbcc0f540b2d4ad6a6 ]
-
-After the previous patch, updating sk_forward_memory is cheap and
-we can drop a lot of complexity from the MPTCP memory accounting,
-removing the custom fwd mem allocations for rmem.
-
-Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-Reviewed-by: Mat Martineau <martineau@kernel.org>
-Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
-Link: https://patch.msgid.link/20250218-net-next-mptcp-rx-path-refactor-v1-4-4a47d90d7998@kernel.org
-Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-Stable-dep-of: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/mptcp/fastopen.c |    2 
- net/mptcp/protocol.c |  115 +++------------------------------------------------
- net/mptcp/protocol.h |    4 -
- 3 files changed, 10 insertions(+), 111 deletions(-)
-
---- a/net/mptcp/fastopen.c
-+++ b/net/mptcp/fastopen.c
-@@ -51,7 +51,7 @@ void mptcp_fastopen_subflow_synack_set_p
-       mptcp_data_lock(sk);
-       DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
--      mptcp_set_owner_r(skb, sk);
-+      skb_set_owner_r(skb, sk);
-       __skb_queue_tail(&sk->sk_receive_queue, skb);
-       mptcp_sk(sk)->bytes_received += skb->len;
---- a/net/mptcp/protocol.c
-+++ b/net/mptcp/protocol.c
-@@ -118,17 +118,6 @@ static void mptcp_drop(struct sock *sk,
-       __kfree_skb(skb);
- }
--static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
--{
--      WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
--                 mptcp_sk(sk)->rmem_fwd_alloc + size);
--}
--
--static void mptcp_rmem_charge(struct sock *sk, int size)
--{
--      mptcp_rmem_fwd_alloc_add(sk, -size);
--}
--
- static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
-                              struct sk_buff *from)
- {
-@@ -150,7 +139,7 @@ static bool mptcp_try_coalesce(struct so
-        * negative one
-        */
-       atomic_add(delta, &sk->sk_rmem_alloc);
--      mptcp_rmem_charge(sk, delta);
-+      sk_mem_charge(sk, delta);
-       kfree_skb_partial(from, fragstolen);
-       return true;
-@@ -165,44 +154,6 @@ static bool mptcp_ooo_try_coalesce(struc
-       return mptcp_try_coalesce((struct sock *)msk, to, from);
- }
--static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
--{
--      amount >>= PAGE_SHIFT;
--      mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
--      __sk_mem_reduce_allocated(sk, amount);
--}
--
--static void mptcp_rmem_uncharge(struct sock *sk, int size)
--{
--      struct mptcp_sock *msk = mptcp_sk(sk);
--      int reclaimable;
--
--      mptcp_rmem_fwd_alloc_add(sk, size);
--      reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
--
--      /* see sk_mem_uncharge() for the rationale behind the following schema */
--      if (unlikely(reclaimable >= PAGE_SIZE))
--              __mptcp_rmem_reclaim(sk, reclaimable);
--}
--
--static void mptcp_rfree(struct sk_buff *skb)
--{
--      unsigned int len = skb->truesize;
--      struct sock *sk = skb->sk;
--
--      atomic_sub(len, &sk->sk_rmem_alloc);
--      mptcp_rmem_uncharge(sk, len);
--}
--
--void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
--{
--      skb_orphan(skb);
--      skb->sk = sk;
--      skb->destructor = mptcp_rfree;
--      atomic_add(skb->truesize, &sk->sk_rmem_alloc);
--      mptcp_rmem_charge(sk, skb->truesize);
--}
--
- /* "inspired" by tcp_data_queue_ofo(), main differences:
-  * - use mptcp seqs
-  * - don't cope with sacks
-@@ -315,25 +266,7 @@ merge_right:
- end:
-       skb_condense(skb);
--      mptcp_set_owner_r(skb, sk);
--}
--
--static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
--{
--      struct mptcp_sock *msk = mptcp_sk(sk);
--      int amt, amount;
--
--      if (size <= msk->rmem_fwd_alloc)
--              return true;
--
--      size -= msk->rmem_fwd_alloc;
--      amt = sk_mem_pages(size);
--      amount = amt << PAGE_SHIFT;
--      if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
--              return false;
--
--      mptcp_rmem_fwd_alloc_add(sk, amount);
--      return true;
-+      skb_set_owner_r(skb, sk);
- }
- static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
-@@ -351,7 +284,7 @@ static bool __mptcp_move_skb(struct mptc
-       skb_orphan(skb);
-       /* try to fetch required memory from subflow */
--      if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) {
-+      if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
-               MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
-               goto drop;
-       }
-@@ -375,7 +308,7 @@ static bool __mptcp_move_skb(struct mptc
-               if (tail && mptcp_try_coalesce(sk, tail, skb))
-                       return true;
--              mptcp_set_owner_r(skb, sk);
-+              skb_set_owner_r(skb, sk);
-               __skb_queue_tail(&sk->sk_receive_queue, skb);
-               return true;
-       } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) {
-@@ -2016,9 +1949,10 @@ static int __mptcp_recvmsg_mskq(struct s
-               }
-               if (!(flags & MSG_PEEK)) {
--                      /* we will bulk release the skb memory later */
-+                      /* avoid the indirect call, we know the destructor is sock_wfree */
-                       skb->destructor = NULL;
--                      WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
-+                      atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
-+                      sk_mem_uncharge(sk, skb->truesize);
-                       __skb_unlink(skb, &sk->sk_receive_queue);
-                       __kfree_skb(skb);
-                       msk->bytes_consumed += count;
-@@ -2132,18 +2066,6 @@ new_measure:
-       msk->rcvq_space.time = mstamp;
- }
--static void __mptcp_update_rmem(struct sock *sk)
--{
--      struct mptcp_sock *msk = mptcp_sk(sk);
--
--      if (!msk->rmem_released)
--              return;
--
--      atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
--      mptcp_rmem_uncharge(sk, msk->rmem_released);
--      WRITE_ONCE(msk->rmem_released, 0);
--}
--
- static bool __mptcp_move_skbs(struct sock *sk)
- {
-       struct mptcp_subflow_context *subflow;
-@@ -2167,7 +2089,6 @@ static bool __mptcp_move_skbs(struct soc
-                       break;
-               slowpath = lock_sock_fast(ssk);
--              __mptcp_update_rmem(sk);
-               done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
-               if (unlikely(ssk->sk_err))
-@@ -2175,12 +2096,7 @@ static bool __mptcp_move_skbs(struct soc
-               unlock_sock_fast(ssk, slowpath);
-       } while (!done);
--      ret = moved > 0;
--      if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) ||
--          !skb_queue_empty(&sk->sk_receive_queue)) {
--              __mptcp_update_rmem(sk);
--              ret |= __mptcp_ofo_queue(msk);
--      }
-+      ret = moved > 0 || __mptcp_ofo_queue(msk);
-       if (ret)
-               mptcp_check_data_fin((struct sock *)msk);
-       return ret;
-@@ -2859,8 +2775,6 @@ static void __mptcp_init_sock(struct soc
-       INIT_WORK(&msk->work, mptcp_worker);
-       msk->out_of_order_queue = RB_ROOT;
-       msk->first_pending = NULL;
--      WRITE_ONCE(msk->rmem_fwd_alloc, 0);
--      WRITE_ONCE(msk->rmem_released, 0);
-       msk->timer_ival = TCP_RTO_MIN;
-       msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
-@@ -3088,8 +3002,6 @@ static void __mptcp_destroy_sock(struct
-       sk->sk_prot->destroy(sk);
--      WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc));
--      WARN_ON_ONCE(msk->rmem_released);
-       sk_stream_kill_queues(sk);
-       xfrm_sk_free_policy(sk);
-@@ -3458,8 +3370,6 @@ void mptcp_destroy_common(struct mptcp_s
-       /* move all the rx fwd alloc into the sk_mem_reclaim_final in
-        * inet_sock_destruct() will dispose it
-        */
--      sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
--      WRITE_ONCE(msk->rmem_fwd_alloc, 0);
-       mptcp_token_destroy(msk);
-       mptcp_pm_free_anno_list(msk);
-       mptcp_free_local_addr_list(msk);
-@@ -3552,8 +3462,6 @@ static void mptcp_release_cb(struct sock
-               if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags))
-                       __mptcp_sync_sndbuf(sk);
-       }
--
--      __mptcp_update_rmem(sk);
- }
- /* MP_JOIN client subflow must wait for 4th ack before sending any data:
-@@ -3729,12 +3637,6 @@ static void mptcp_shutdown(struct sock *
-               __mptcp_wr_shutdown(sk);
- }
--static int mptcp_forward_alloc_get(const struct sock *sk)
--{
--      return READ_ONCE(sk->sk_forward_alloc) +
--             READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
--}
--
- static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
- {
-       const struct sock *sk = (void *)msk;
-@@ -3893,7 +3795,6 @@ static struct proto mptcp_prot = {
-       .hash           = mptcp_hash,
-       .unhash         = mptcp_unhash,
-       .get_port       = mptcp_get_port,
--      .forward_alloc_get      = mptcp_forward_alloc_get,
-       .stream_memory_free     = mptcp_stream_memory_free,
-       .sockets_allocated      = &mptcp_sockets_allocated,
---- a/net/mptcp/protocol.h
-+++ b/net/mptcp/protocol.h
-@@ -280,7 +280,6 @@ struct mptcp_sock {
-       u64             rcv_data_fin_seq;
-       u64             bytes_retrans;
-       u64             bytes_consumed;
--      int             rmem_fwd_alloc;
-       int             snd_burst;
-       int             old_wspace;
-       u64             recovery_snd_nxt;       /* in recovery mode accept up to this seq;
-@@ -295,7 +294,6 @@ struct mptcp_sock {
-       u32             last_ack_recv;
-       unsigned long   timer_ival;
-       u32             token;
--      int             rmem_released;
-       unsigned long   flags;
-       unsigned long   cb_flags;
-       bool            recovery;               /* closing subflow write queue reinjected */
-@@ -392,7 +390,7 @@ static inline void msk_owned_by_me(const
-  */
- static inline int __mptcp_rmem(const struct sock *sk)
- {
--      return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
-+      return atomic_read(&sk->sk_rmem_alloc);
- }
- static inline int mptcp_win_from_space(const struct sock *sk, int space)
diff --git a/queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch b/queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch
deleted file mode 100644 (file)
index 9f247ab..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-From stable+bounces-192098-greg=kroah.com@vger.kernel.org Mon Nov  3 08:27:48 2025
-From: Sasha Levin <sashal@kernel.org>
-Date: Sun,  2 Nov 2025 18:27:35 -0500
-Subject: mptcp: fix MSG_PEEK stream corruption
-To: stable@vger.kernel.org
-Cc: Paolo Abeni <pabeni@redhat.com>, Geliang Tang <geliang@kernel.org>, Mat Martineau <martineau@kernel.org>, "Matthieu Baerts (NGI0)" <matttbe@kernel.org>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
-Message-ID: <20251102232735.3652847-4-sashal@kernel.org>
-
-From: Paolo Abeni <pabeni@redhat.com>
-
-[ Upstream commit 8e04ce45a8db7a080220e86e249198fa676b83dc ]
-
-If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the
-receive queue and recvmsg() need to waits for more data - i.e. it's a
-blocking one - upon arrival of the next packet the MPTCP protocol will
-start again copying the oldest data present in the receive queue,
-corrupting the data stream.
-
-Address the issue explicitly tracking the peeked sequence number,
-restarting from the last peeked byte.
-
-Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support")
-Cc: stable@vger.kernel.org
-Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-Reviewed-by: Geliang Tang <geliang@kernel.org>
-Tested-by: Geliang Tang <geliang@kernel.org>
-Reviewed-by: Mat Martineau <martineau@kernel.org>
-Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
-Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-2-38ffff5a9ec8@kernel.org
-Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/mptcp/protocol.c |   38 +++++++++++++++++++++++++-------------
- 1 file changed, 25 insertions(+), 13 deletions(-)
-
---- a/net/mptcp/protocol.c
-+++ b/net/mptcp/protocol.c
-@@ -1907,22 +1907,36 @@ do_error:
- static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
--static int __mptcp_recvmsg_mskq(struct sock *sk,
--                              struct msghdr *msg,
--                              size_t len, int flags,
-+static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
-+                              size_t len, int flags, int copied_total,
-                               struct scm_timestamping_internal *tss,
-                               int *cmsg_flags)
- {
-       struct mptcp_sock *msk = mptcp_sk(sk);
-       struct sk_buff *skb, *tmp;
-+      int total_data_len = 0;
-       int copied = 0;
-       skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
--              u32 offset = MPTCP_SKB_CB(skb)->offset;
-+              u32 delta, offset = MPTCP_SKB_CB(skb)->offset;
-               u32 data_len = skb->len - offset;
--              u32 count = min_t(size_t, len - copied, data_len);
-+              u32 count;
-               int err;
-+              if (flags & MSG_PEEK) {
-+                      /* skip already peeked skbs */
-+                      if (total_data_len + data_len <= copied_total) {
-+                              total_data_len += data_len;
-+                              continue;
-+                      }
-+
-+                      /* skip the already peeked data in the current skb */
-+                      delta = copied_total - total_data_len;
-+                      offset += delta;
-+                      data_len -= delta;
-+              }
-+
-+              count = min_t(size_t, len - copied, data_len);
-               if (!(flags & MSG_TRUNC)) {
-                       err = skb_copy_datagram_msg(skb, offset, msg, count);
-                       if (unlikely(err < 0)) {
-@@ -1939,16 +1953,14 @@ static int __mptcp_recvmsg_mskq(struct s
-               copied += count;
--              if (count < data_len) {
--                      if (!(flags & MSG_PEEK)) {
-+              if (!(flags & MSG_PEEK)) {
-+                      msk->bytes_consumed += count;
-+                      if (count < data_len) {
-                               MPTCP_SKB_CB(skb)->offset += count;
-                               MPTCP_SKB_CB(skb)->map_seq += count;
--                              msk->bytes_consumed += count;
-+                              break;
-                       }
--                      break;
--              }
--              if (!(flags & MSG_PEEK)) {
-                       /* avoid the indirect call, we know the destructor is sock_rfree */
-                       skb->destructor = NULL;
-                       skb->sk = NULL;
-@@ -1956,7 +1968,6 @@ static int __mptcp_recvmsg_mskq(struct s
-                       sk_mem_uncharge(sk, skb->truesize);
-                       __skb_unlink(skb, &sk->sk_receive_queue);
-                       skb_attempt_defer_free(skb);
--                      msk->bytes_consumed += count;
-               }
-               if (copied >= len)
-@@ -2154,7 +2165,8 @@ static int mptcp_recvmsg(struct sock *sk
-       while (copied < len) {
-               int err, bytes_read;
--              bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
-+              bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags,
-+                                                copied, &tss, &cmsg_flags);
-               if (unlikely(bytes_read < 0)) {
-                       if (!copied)
-                               copied = bytes_read;
diff --git a/queue-6.12/mptcp-leverage-skb-deferral-free.patch b/queue-6.12/mptcp-leverage-skb-deferral-free.patch
deleted file mode 100644 (file)
index a099c64..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-From stable+bounces-192097-greg=kroah.com@vger.kernel.org Mon Nov  3 08:27:49 2025
-From: Sasha Levin <sashal@kernel.org>
-Date: Sun,  2 Nov 2025 18:27:34 -0500
-Subject: mptcp: leverage skb deferral free
-To: stable@vger.kernel.org
-Cc: Paolo Abeni <pabeni@redhat.com>, Geliang Tang <geliang@kernel.org>, "Matthieu Baerts (NGI0)" <matttbe@kernel.org>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
-Message-ID: <20251102232735.3652847-3-sashal@kernel.org>
-
-From: Paolo Abeni <pabeni@redhat.com>
-
-[ Upstream commit 9aa59323f2709370cb4f01acbba599a9167f317b ]
-
-Usage of the skb deferral API is straight-forward; with multiple
-subflows actives this allow moving part of the received application
-load into multiple CPUs.
-
-Also fix a typo in the related comment.
-
-Reviewed-by: Geliang Tang <geliang@kernel.org>
-Tested-by: Geliang Tang <geliang@kernel.org>
-Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
-Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
-Link: https://patch.msgid.link/20250927-net-next-mptcp-rcv-path-imp-v1-1-5da266aa9c1a@kernel.org
-Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-Stable-dep-of: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/mptcp/protocol.c |    5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
---- a/net/mptcp/protocol.c
-+++ b/net/mptcp/protocol.c
-@@ -1949,12 +1949,13 @@ static int __mptcp_recvmsg_mskq(struct s
-               }
-               if (!(flags & MSG_PEEK)) {
--                      /* avoid the indirect call, we know the destructor is sock_wfree */
-+                      /* avoid the indirect call, we know the destructor is sock_rfree */
-                       skb->destructor = NULL;
-+                      skb->sk = NULL;
-                       atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
-                       sk_mem_uncharge(sk, skb->truesize);
-                       __skb_unlink(skb, &sk->sk_receive_queue);
--                      __kfree_skb(skb);
-+                      skb_attempt_defer_free(skb);
-                       msk->bytes_consumed += count;
-               }
diff --git a/queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch b/queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch
deleted file mode 100644 (file)
index d6d5272..0000000
+++ /dev/null
@@ -1,359 +0,0 @@
-From stable+bounces-192095-greg=kroah.com@vger.kernel.org Mon Nov  3 08:27:43 2025
-From: Sasha Levin <sashal@kernel.org>
-Date: Sun,  2 Nov 2025 18:27:32 -0500
-Subject: mptcp: move the whole rx path under msk socket lock protection
-To: stable@vger.kernel.org
-Cc: Paolo Abeni <pabeni@redhat.com>, Mat Martineau <martineau@kernel.org>, "Matthieu Baerts (NGI0)" <matttbe@kernel.org>, Jakub Kicinski <kuba@kernel.org>, Sasha Levin <sashal@kernel.org>
-Message-ID: <20251102232735.3652847-1-sashal@kernel.org>
-
-From: Paolo Abeni <pabeni@redhat.com>
-
-[ Upstream commit bc68b0efa1bf923cef1294a631d8e7416c7e06e4 ]
-
-After commit c2e6048fa1cf ("mptcp: fix race in release_cb") we can
-move the whole MPTCP rx path under the socket lock leveraging the
-release_cb.
-
-We can drop a bunch of spin_lock pairs in the receive functions, use
-a single receive queue and invoke __mptcp_move_skbs only when subflows
-ask for it.
-
-This will allow more cleanup in the next patch.
-
-Some changes are worth specific mention:
-
-The msk rcvbuf update now always happens under both the msk and the
-subflow socket lock: we can drop a bunch of ONCE annotation and
-consolidate the checks.
-
-When the skbs move is delayed at msk release callback time, even the
-msk rcvbuf update is delayed; additionally take care of such action in
-__mptcp_move_skbs().
-
-Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-Reviewed-by: Mat Martineau <martineau@kernel.org>
-Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
-Link: https://patch.msgid.link/20250218-net-next-mptcp-rx-path-refactor-v1-3-4a47d90d7998@kernel.org
-Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-Stable-dep-of: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- net/mptcp/fastopen.c |    1 
- net/mptcp/protocol.c |  123 ++++++++++++++++++++++++---------------------------
- net/mptcp/protocol.h |    2 
- 3 files changed, 60 insertions(+), 66 deletions(-)
-
---- a/net/mptcp/fastopen.c
-+++ b/net/mptcp/fastopen.c
-@@ -49,6 +49,7 @@ void mptcp_fastopen_subflow_synack_set_p
-       MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
-       mptcp_data_lock(sk);
-+      DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
-       mptcp_set_owner_r(skb, sk);
-       __skb_queue_tail(&sk->sk_receive_queue, skb);
---- a/net/mptcp/protocol.c
-+++ b/net/mptcp/protocol.c
-@@ -658,18 +658,6 @@ static bool __mptcp_move_skbs_from_subfl
-       bool more_data_avail;
-       struct tcp_sock *tp;
-       bool done = false;
--      int sk_rbuf;
--
--      sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
--
--      if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
--              int ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
--
--              if (unlikely(ssk_rbuf > sk_rbuf)) {
--                      WRITE_ONCE(sk->sk_rcvbuf, ssk_rbuf);
--                      sk_rbuf = ssk_rbuf;
--              }
--      }
-       pr_debug("msk=%p ssk=%p\n", msk, ssk);
-       tp = tcp_sk(ssk);
-@@ -737,7 +725,7 @@ static bool __mptcp_move_skbs_from_subfl
-               WRITE_ONCE(tp->copied_seq, seq);
-               more_data_avail = mptcp_subflow_data_available(ssk);
--              if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) {
-+              if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
-                       done = true;
-                       break;
-               }
-@@ -861,11 +849,30 @@ static bool move_skbs_to_msk(struct mptc
-       return moved > 0;
- }
-+static void __mptcp_rcvbuf_update(struct sock *sk, struct sock *ssk)
-+{
-+      if (unlikely(ssk->sk_rcvbuf > sk->sk_rcvbuf))
-+              WRITE_ONCE(sk->sk_rcvbuf, ssk->sk_rcvbuf);
-+}
-+
-+static void __mptcp_data_ready(struct sock *sk, struct sock *ssk)
-+{
-+      struct mptcp_sock *msk = mptcp_sk(sk);
-+
-+      __mptcp_rcvbuf_update(sk, ssk);
-+
-+      /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
-+      if (__mptcp_rmem(sk) > sk->sk_rcvbuf)
-+              return;
-+
-+      /* Wake-up the reader only for in-sequence data */
-+      if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
-+              sk->sk_data_ready(sk);
-+}
-+
- void mptcp_data_ready(struct sock *sk, struct sock *ssk)
- {
-       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
--      struct mptcp_sock *msk = mptcp_sk(sk);
--      int sk_rbuf, ssk_rbuf;
-       /* The peer can send data while we are shutting down this
-        * subflow at msk destruction time, but we must avoid enqueuing
-@@ -874,19 +881,11 @@ void mptcp_data_ready(struct sock *sk, s
-       if (unlikely(subflow->disposable))
-               return;
--      ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
--      sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
--      if (unlikely(ssk_rbuf > sk_rbuf))
--              sk_rbuf = ssk_rbuf;
--
--      /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
--      if (__mptcp_rmem(sk) > sk_rbuf)
--              return;
--
--      /* Wake-up the reader only for in-sequence data */
-       mptcp_data_lock(sk);
--      if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
--              sk->sk_data_ready(sk);
-+      if (!sock_owned_by_user(sk))
-+              __mptcp_data_ready(sk, ssk);
-+      else
-+              __set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags);
-       mptcp_data_unlock(sk);
- }
-@@ -1975,16 +1974,17 @@ do_error:
- static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
--static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
-+static int __mptcp_recvmsg_mskq(struct sock *sk,
-                               struct msghdr *msg,
-                               size_t len, int flags,
-                               struct scm_timestamping_internal *tss,
-                               int *cmsg_flags)
- {
-+      struct mptcp_sock *msk = mptcp_sk(sk);
-       struct sk_buff *skb, *tmp;
-       int copied = 0;
--      skb_queue_walk_safe(&msk->receive_queue, skb, tmp) {
-+      skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
-               u32 offset = MPTCP_SKB_CB(skb)->offset;
-               u32 data_len = skb->len - offset;
-               u32 count = min_t(size_t, len - copied, data_len);
-@@ -2019,7 +2019,7 @@ static int __mptcp_recvmsg_mskq(struct m
-                       /* we will bulk release the skb memory later */
-                       skb->destructor = NULL;
-                       WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
--                      __skb_unlink(skb, &msk->receive_queue);
-+                      __skb_unlink(skb, &sk->sk_receive_queue);
-                       __kfree_skb(skb);
-                       msk->bytes_consumed += count;
-               }
-@@ -2144,54 +2144,46 @@ static void __mptcp_update_rmem(struct s
-       WRITE_ONCE(msk->rmem_released, 0);
- }
--static void __mptcp_splice_receive_queue(struct sock *sk)
-+static bool __mptcp_move_skbs(struct sock *sk)
- {
-+      struct mptcp_subflow_context *subflow;
-       struct mptcp_sock *msk = mptcp_sk(sk);
--
--      skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
--}
--
--static bool __mptcp_move_skbs(struct mptcp_sock *msk)
--{
--      struct sock *sk = (struct sock *)msk;
-       unsigned int moved = 0;
-       bool ret, done;
-+      /* verify we can move any data from the subflow, eventually updating */
-+      if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
-+              mptcp_for_each_subflow(msk, subflow)
-+                      __mptcp_rcvbuf_update(sk, subflow->tcp_sock);
-+
-+      if (__mptcp_rmem(sk) > sk->sk_rcvbuf)
-+              return false;
-+
-       do {
-               struct sock *ssk = mptcp_subflow_recv_lookup(msk);
-               bool slowpath;
--              /* we can have data pending in the subflows only if the msk
--               * receive buffer was full at subflow_data_ready() time,
--               * that is an unlikely slow path.
--               */
--              if (likely(!ssk))
-+              if (unlikely(!ssk))
-                       break;
-               slowpath = lock_sock_fast(ssk);
--              mptcp_data_lock(sk);
-               __mptcp_update_rmem(sk);
-               done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
--              mptcp_data_unlock(sk);
-               if (unlikely(ssk->sk_err))
-                       __mptcp_error_report(sk);
-               unlock_sock_fast(ssk, slowpath);
-       } while (!done);
--      /* acquire the data lock only if some input data is pending */
-       ret = moved > 0;
-       if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) ||
--          !skb_queue_empty_lockless(&sk->sk_receive_queue)) {
--              mptcp_data_lock(sk);
-+          !skb_queue_empty(&sk->sk_receive_queue)) {
-               __mptcp_update_rmem(sk);
-               ret |= __mptcp_ofo_queue(msk);
--              __mptcp_splice_receive_queue(sk);
--              mptcp_data_unlock(sk);
-       }
-       if (ret)
-               mptcp_check_data_fin((struct sock *)msk);
--      return !skb_queue_empty(&msk->receive_queue);
-+      return ret;
- }
- static unsigned int mptcp_inq_hint(const struct sock *sk)
-@@ -2199,7 +2191,7 @@ static unsigned int mptcp_inq_hint(const
-       const struct mptcp_sock *msk = mptcp_sk(sk);
-       const struct sk_buff *skb;
--      skb = skb_peek(&msk->receive_queue);
-+      skb = skb_peek(&sk->sk_receive_queue);
-       if (skb) {
-               u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;
-@@ -2245,7 +2237,7 @@ static int mptcp_recvmsg(struct sock *sk
-       while (copied < len) {
-               int err, bytes_read;
--              bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
-+              bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
-               if (unlikely(bytes_read < 0)) {
-                       if (!copied)
-                               copied = bytes_read;
-@@ -2254,7 +2246,7 @@ static int mptcp_recvmsg(struct sock *sk
-               copied += bytes_read;
--              if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
-+              if (skb_queue_empty(&sk->sk_receive_queue) && __mptcp_move_skbs(sk))
-                       continue;
-               /* only the MPTCP socket status is relevant here. The exit
-@@ -2280,7 +2272,7 @@ static int mptcp_recvmsg(struct sock *sk
-                               /* race breaker: the shutdown could be after the
-                                * previous receive queue check
-                                */
--                              if (__mptcp_move_skbs(msk))
-+                              if (__mptcp_move_skbs(sk))
-                                       continue;
-                               break;
-                       }
-@@ -2324,9 +2316,8 @@ out_err:
-               }
-       }
--      pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n",
--               msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
--               skb_queue_empty(&msk->receive_queue), copied);
-+      pr_debug("msk=%p rx queue empty=%d copied=%d\n",
-+               msk, skb_queue_empty(&sk->sk_receive_queue), copied);
-       release_sock(sk);
-       return copied;
-@@ -2866,7 +2857,6 @@ static void __mptcp_init_sock(struct soc
-       INIT_LIST_HEAD(&msk->join_list);
-       INIT_LIST_HEAD(&msk->rtx_queue);
-       INIT_WORK(&msk->work, mptcp_worker);
--      __skb_queue_head_init(&msk->receive_queue);
-       msk->out_of_order_queue = RB_ROOT;
-       msk->first_pending = NULL;
-       WRITE_ONCE(msk->rmem_fwd_alloc, 0);
-@@ -3462,12 +3452,8 @@ void mptcp_destroy_common(struct mptcp_s
-       mptcp_for_each_subflow_safe(msk, subflow, tmp)
-               __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags);
--      /* move to sk_receive_queue, sk_stream_kill_queues will purge it */
--      mptcp_data_lock(sk);
--      skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
-       __skb_queue_purge(&sk->sk_receive_queue);
-       skb_rbtree_purge(&msk->out_of_order_queue);
--      mptcp_data_unlock(sk);
-       /* move all the rx fwd alloc into the sk_mem_reclaim_final in
-        * inet_sock_destruct() will dispose it
-@@ -3507,7 +3493,8 @@ void __mptcp_check_push(struct sock *sk,
- #define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
-                                     BIT(MPTCP_RETRANSMIT) | \
--                                    BIT(MPTCP_FLUSH_JOIN_LIST))
-+                                    BIT(MPTCP_FLUSH_JOIN_LIST) | \
-+                                    BIT(MPTCP_DEQUEUE))
- /* processes deferred events and flush wmem */
- static void mptcp_release_cb(struct sock *sk)
-@@ -3541,6 +3528,11 @@ static void mptcp_release_cb(struct sock
-                       __mptcp_push_pending(sk, 0);
-               if (flags & BIT(MPTCP_RETRANSMIT))
-                       __mptcp_retrans(sk);
-+              if ((flags & BIT(MPTCP_DEQUEUE)) && __mptcp_move_skbs(sk)) {
-+                      /* notify ack seq update */
-+                      mptcp_cleanup_rbuf(msk, 0);
-+                      sk->sk_data_ready(sk);
-+              }
-               cond_resched();
-               spin_lock_bh(&sk->sk_lock.slock);
-@@ -3783,7 +3775,8 @@ static int mptcp_ioctl(struct sock *sk,
-                       return -EINVAL;
-               lock_sock(sk);
--              __mptcp_move_skbs(msk);
-+              if (__mptcp_move_skbs(sk))
-+                      mptcp_cleanup_rbuf(msk, 0);
-               *karg = mptcp_inq_hint(sk);
-               release_sock(sk);
-               break;
---- a/net/mptcp/protocol.h
-+++ b/net/mptcp/protocol.h
-@@ -124,6 +124,7 @@
- #define MPTCP_FLUSH_JOIN_LIST 5
- #define MPTCP_SYNC_STATE      6
- #define MPTCP_SYNC_SNDBUF     7
-+#define MPTCP_DEQUEUE         8
- struct mptcp_skb_cb {
-       u64 map_seq;
-@@ -324,7 +325,6 @@ struct mptcp_sock {
-       struct work_struct work;
-       struct sk_buff  *ooo_last_skb;
-       struct rb_root  out_of_order_queue;
--      struct sk_buff_head receive_queue;
-       struct list_head conn_list;
-       struct list_head rtx_queue;
-       struct mptcp_data_frag *first_pending;
index c77e3861926b8c25b267a2c9a39bc4b5081619de..5d8c9f8a315bfe48bb40c901b28e05ae92a8a1f1 100644 (file)
@@ -77,8 +77,4 @@ net-phy-dp83867-disable-eee-support-as-not-implemented.patch
 sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch
 cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch
 cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch
-mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch
-mptcp-cleanup-mem-accounting.patch
-mptcp-leverage-skb-deferral-free.patch
-mptcp-fix-msg_peek-stream-corruption.patch
 mfd-kempld-switch-back-to-earlier-init-behavior.patch