From e9b7426efdbbb9f057ec5ab6a4f2c20d6c48c8d9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Nov 2025 11:48:55 +0900 Subject: [PATCH] drop mptcp patches from 6.12 --- queue-6.12/mptcp-cleanup-mem-accounting.patch | 298 --------------- ...mptcp-fix-msg_peek-stream-corruption.patch | 119 ------ .../mptcp-leverage-skb-deferral-free.patch | 50 --- ...ath-under-msk-socket-lock-protection.patch | 359 ------------------ queue-6.12/series | 4 - 5 files changed, 830 deletions(-) delete mode 100644 queue-6.12/mptcp-cleanup-mem-accounting.patch delete mode 100644 queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch delete mode 100644 queue-6.12/mptcp-leverage-skb-deferral-free.patch delete mode 100644 queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch diff --git a/queue-6.12/mptcp-cleanup-mem-accounting.patch b/queue-6.12/mptcp-cleanup-mem-accounting.patch deleted file mode 100644 index 6e1a8b47e6..0000000000 --- a/queue-6.12/mptcp-cleanup-mem-accounting.patch +++ /dev/null @@ -1,298 +0,0 @@ -From stable+bounces-192096-greg=kroah.com@vger.kernel.org Mon Nov 3 08:27:44 2025 -From: Sasha Levin -Date: Sun, 2 Nov 2025 18:27:33 -0500 -Subject: mptcp: cleanup mem accounting -To: stable@vger.kernel.org -Cc: Paolo Abeni , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin -Message-ID: <20251102232735.3652847-2-sashal@kernel.org> - -From: Paolo Abeni - -[ Upstream commit 6639498ed85fdb135dfb0dfbcc0f540b2d4ad6a6 ] - -After the previous patch, updating sk_forward_memory is cheap and -we can drop a lot of complexity from the MPTCP memory accounting, -removing the custom fwd mem allocations for rmem. - -Signed-off-by: Paolo Abeni -Reviewed-by: Mat Martineau -Signed-off-by: Matthieu Baerts (NGI0) -Link: https://patch.msgid.link/20250218-net-next-mptcp-rx-path-refactor-v1-4-4a47d90d7998@kernel.org -Signed-off-by: Jakub Kicinski -Stable-dep-of: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption") -Signed-off-by: Sasha Levin -Signed-off-by: Greg Kroah-Hartman ---- - net/mptcp/fastopen.c | 2 - net/mptcp/protocol.c | 115 +++------------------------------------------------ - net/mptcp/protocol.h | 4 - - 3 files changed, 10 insertions(+), 111 deletions(-) - ---- a/net/mptcp/fastopen.c -+++ b/net/mptcp/fastopen.c -@@ -51,7 +51,7 @@ void mptcp_fastopen_subflow_synack_set_p - mptcp_data_lock(sk); - DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk)); - -- mptcp_set_owner_r(skb, sk); -+ skb_set_owner_r(skb, sk); - __skb_queue_tail(&sk->sk_receive_queue, skb); - mptcp_sk(sk)->bytes_received += skb->len; - ---- a/net/mptcp/protocol.c -+++ b/net/mptcp/protocol.c -@@ -118,17 +118,6 @@ static void mptcp_drop(struct sock *sk, - __kfree_skb(skb); - } - --static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size) --{ -- WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc, -- mptcp_sk(sk)->rmem_fwd_alloc + size); --} -- --static void mptcp_rmem_charge(struct sock *sk, int size) --{ -- mptcp_rmem_fwd_alloc_add(sk, -size); --} -- - static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, - struct sk_buff *from) - { -@@ -150,7 +139,7 @@ static bool mptcp_try_coalesce(struct so - * negative one - */ - atomic_add(delta, &sk->sk_rmem_alloc); -- mptcp_rmem_charge(sk, delta); -+ sk_mem_charge(sk, delta); - kfree_skb_partial(from, fragstolen); - - return true; -@@ -165,44 +154,6 @@ static bool mptcp_ooo_try_coalesce(struc - return mptcp_try_coalesce((struct sock *)msk, to, from); - } - --static void __mptcp_rmem_reclaim(struct sock *sk, int amount) --{ -- amount >>= PAGE_SHIFT; -- mptcp_rmem_charge(sk, amount << PAGE_SHIFT); -- __sk_mem_reduce_allocated(sk, amount); --} -- --static void mptcp_rmem_uncharge(struct sock *sk, int size) --{ -- struct mptcp_sock *msk = mptcp_sk(sk); -- int reclaimable; -- -- mptcp_rmem_fwd_alloc_add(sk, size); -- reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); -- -- /* see sk_mem_uncharge() for the rationale behind the following schema */ -- if (unlikely(reclaimable >= PAGE_SIZE)) -- __mptcp_rmem_reclaim(sk, reclaimable); --} -- --static void mptcp_rfree(struct sk_buff *skb) --{ -- unsigned int len = skb->truesize; -- struct sock *sk = skb->sk; -- -- atomic_sub(len, &sk->sk_rmem_alloc); -- mptcp_rmem_uncharge(sk, len); --} -- --void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk) --{ -- skb_orphan(skb); -- skb->sk = sk; -- skb->destructor = mptcp_rfree; -- atomic_add(skb->truesize, &sk->sk_rmem_alloc); -- mptcp_rmem_charge(sk, skb->truesize); --} -- - /* "inspired" by tcp_data_queue_ofo(), main differences: - * - use mptcp seqs - * - don't cope with sacks -@@ -315,25 +266,7 @@ merge_right: - - end: - skb_condense(skb); -- mptcp_set_owner_r(skb, sk); --} -- --static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) --{ -- struct mptcp_sock *msk = mptcp_sk(sk); -- int amt, amount; -- -- if (size <= msk->rmem_fwd_alloc) -- return true; -- -- size -= msk->rmem_fwd_alloc; -- amt = sk_mem_pages(size); -- amount = amt << PAGE_SHIFT; -- if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) -- return false; -- -- mptcp_rmem_fwd_alloc_add(sk, amount); -- return true; -+ skb_set_owner_r(skb, sk); - } - - static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, -@@ -351,7 +284,7 @@ static bool __mptcp_move_skb(struct mptc - skb_orphan(skb); - - /* try to fetch required memory from subflow */ -- if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { -+ if (!sk_rmem_schedule(sk, skb, skb->truesize)) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); - goto drop; - } -@@ -375,7 +308,7 @@ static bool __mptcp_move_skb(struct mptc - if (tail && mptcp_try_coalesce(sk, tail, skb)) - return true; - -- mptcp_set_owner_r(skb, sk); -+ skb_set_owner_r(skb, sk); - __skb_queue_tail(&sk->sk_receive_queue, skb); - return true; - } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) { -@@ -2016,9 +1949,10 @@ static int __mptcp_recvmsg_mskq(struct s - } - - if (!(flags & MSG_PEEK)) { -- /* we will bulk release the skb memory later */ -+ /* avoid the indirect call, we know the destructor is sock_wfree */ - skb->destructor = NULL; -- WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); -+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc); -+ sk_mem_uncharge(sk, skb->truesize); - __skb_unlink(skb, &sk->sk_receive_queue); - __kfree_skb(skb); - msk->bytes_consumed += count; -@@ -2132,18 +2066,6 @@ new_measure: - msk->rcvq_space.time = mstamp; - } - --static void __mptcp_update_rmem(struct sock *sk) --{ -- struct mptcp_sock *msk = mptcp_sk(sk); -- -- if (!msk->rmem_released) -- return; -- -- atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc); -- mptcp_rmem_uncharge(sk, msk->rmem_released); -- WRITE_ONCE(msk->rmem_released, 0); --} -- - static bool __mptcp_move_skbs(struct sock *sk) - { - struct mptcp_subflow_context *subflow; -@@ -2167,7 +2089,6 @@ static bool __mptcp_move_skbs(struct soc - break; - - slowpath = lock_sock_fast(ssk); -- __mptcp_update_rmem(sk); - done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); - - if (unlikely(ssk->sk_err)) -@@ -2175,12 +2096,7 @@ static bool __mptcp_move_skbs(struct soc - unlock_sock_fast(ssk, slowpath); - } while (!done); - -- ret = moved > 0; -- if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) || -- !skb_queue_empty(&sk->sk_receive_queue)) { -- __mptcp_update_rmem(sk); -- ret |= __mptcp_ofo_queue(msk); -- } -+ ret = moved > 0 || __mptcp_ofo_queue(msk); - if (ret) - mptcp_check_data_fin((struct sock *)msk); - return ret; -@@ -2859,8 +2775,6 @@ static void __mptcp_init_sock(struct soc - INIT_WORK(&msk->work, mptcp_worker); - msk->out_of_order_queue = RB_ROOT; - msk->first_pending = NULL; -- WRITE_ONCE(msk->rmem_fwd_alloc, 0); -- WRITE_ONCE(msk->rmem_released, 0); - msk->timer_ival = TCP_RTO_MIN; - msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; - -@@ -3088,8 +3002,6 @@ static void __mptcp_destroy_sock(struct - - sk->sk_prot->destroy(sk); - -- WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc)); -- WARN_ON_ONCE(msk->rmem_released); - sk_stream_kill_queues(sk); - xfrm_sk_free_policy(sk); - -@@ -3458,8 +3370,6 @@ void mptcp_destroy_common(struct mptcp_s - /* move all the rx fwd alloc into the sk_mem_reclaim_final in - * inet_sock_destruct() will dispose it - */ -- sk_forward_alloc_add(sk, msk->rmem_fwd_alloc); -- WRITE_ONCE(msk->rmem_fwd_alloc, 0); - mptcp_token_destroy(msk); - mptcp_pm_free_anno_list(msk); - mptcp_free_local_addr_list(msk); -@@ -3552,8 +3462,6 @@ static void mptcp_release_cb(struct sock - if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) - __mptcp_sync_sndbuf(sk); - } -- -- __mptcp_update_rmem(sk); - } - - /* MP_JOIN client subflow must wait for 4th ack before sending any data: -@@ -3729,12 +3637,6 @@ static void mptcp_shutdown(struct sock * - __mptcp_wr_shutdown(sk); - } - --static int mptcp_forward_alloc_get(const struct sock *sk) --{ -- return READ_ONCE(sk->sk_forward_alloc) + -- READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc); --} -- - static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) - { - const struct sock *sk = (void *)msk; -@@ -3893,7 +3795,6 @@ static struct proto mptcp_prot = { - .hash = mptcp_hash, - .unhash = mptcp_unhash, - .get_port = mptcp_get_port, -- .forward_alloc_get = mptcp_forward_alloc_get, - .stream_memory_free = mptcp_stream_memory_free, - .sockets_allocated = &mptcp_sockets_allocated, - ---- a/net/mptcp/protocol.h -+++ b/net/mptcp/protocol.h -@@ -280,7 +280,6 @@ struct mptcp_sock { - u64 rcv_data_fin_seq; - u64 bytes_retrans; - u64 bytes_consumed; -- int rmem_fwd_alloc; - int snd_burst; - int old_wspace; - u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; -@@ -295,7 +294,6 @@ struct mptcp_sock { - u32 last_ack_recv; - unsigned long timer_ival; - u32 token; -- int rmem_released; - unsigned long flags; - unsigned long cb_flags; - bool recovery; /* closing subflow write queue reinjected */ -@@ -392,7 +390,7 @@ static inline void msk_owned_by_me(const - */ - static inline int __mptcp_rmem(const struct sock *sk) - { -- return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released); -+ return atomic_read(&sk->sk_rmem_alloc); - } - - static inline int mptcp_win_from_space(const struct sock *sk, int space) diff --git a/queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch b/queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch deleted file mode 100644 index 9f247ab561..0000000000 --- a/queue-6.12/mptcp-fix-msg_peek-stream-corruption.patch +++ /dev/null @@ -1,119 +0,0 @@ -From stable+bounces-192098-greg=kroah.com@vger.kernel.org Mon Nov 3 08:27:48 2025 -From: Sasha Levin -Date: Sun, 2 Nov 2025 18:27:35 -0500 -Subject: mptcp: fix MSG_PEEK stream corruption -To: stable@vger.kernel.org -Cc: Paolo Abeni , Geliang Tang , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin -Message-ID: <20251102232735.3652847-4-sashal@kernel.org> - -From: Paolo Abeni - -[ Upstream commit 8e04ce45a8db7a080220e86e249198fa676b83dc ] - -If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the -receive queue and recvmsg() need to waits for more data - i.e. it's a -blocking one - upon arrival of the next packet the MPTCP protocol will -start again copying the oldest data present in the receive queue, -corrupting the data stream. - -Address the issue explicitly tracking the peeked sequence number, -restarting from the last peeked byte. - -Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support") -Cc: stable@vger.kernel.org -Signed-off-by: Paolo Abeni -Reviewed-by: Geliang Tang -Tested-by: Geliang Tang -Reviewed-by: Mat Martineau -Signed-off-by: Matthieu Baerts (NGI0) -Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-2-38ffff5a9ec8@kernel.org -Signed-off-by: Jakub Kicinski -Signed-off-by: Sasha Levin -Signed-off-by: Greg Kroah-Hartman ---- - net/mptcp/protocol.c | 38 +++++++++++++++++++++++++------------- - 1 file changed, 25 insertions(+), 13 deletions(-) - ---- a/net/mptcp/protocol.c -+++ b/net/mptcp/protocol.c -@@ -1907,22 +1907,36 @@ do_error: - - static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); - --static int __mptcp_recvmsg_mskq(struct sock *sk, -- struct msghdr *msg, -- size_t len, int flags, -+static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, -+ size_t len, int flags, int copied_total, - struct scm_timestamping_internal *tss, - int *cmsg_flags) - { - struct mptcp_sock *msk = mptcp_sk(sk); - struct sk_buff *skb, *tmp; -+ int total_data_len = 0; - int copied = 0; - - skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { -- u32 offset = MPTCP_SKB_CB(skb)->offset; -+ u32 delta, offset = MPTCP_SKB_CB(skb)->offset; - u32 data_len = skb->len - offset; -- u32 count = min_t(size_t, len - copied, data_len); -+ u32 count; - int err; - -+ if (flags & MSG_PEEK) { -+ /* skip already peeked skbs */ -+ if (total_data_len + data_len <= copied_total) { -+ total_data_len += data_len; -+ continue; -+ } -+ -+ /* skip the already peeked data in the current skb */ -+ delta = copied_total - total_data_len; -+ offset += delta; -+ data_len -= delta; -+ } -+ -+ count = min_t(size_t, len - copied, data_len); - if (!(flags & MSG_TRUNC)) { - err = skb_copy_datagram_msg(skb, offset, msg, count); - if (unlikely(err < 0)) { -@@ -1939,16 +1953,14 @@ static int __mptcp_recvmsg_mskq(struct s - - copied += count; - -- if (count < data_len) { -- if (!(flags & MSG_PEEK)) { -+ if (!(flags & MSG_PEEK)) { -+ msk->bytes_consumed += count; -+ if (count < data_len) { - MPTCP_SKB_CB(skb)->offset += count; - MPTCP_SKB_CB(skb)->map_seq += count; -- msk->bytes_consumed += count; -+ break; - } -- break; -- } - -- if (!(flags & MSG_PEEK)) { - /* avoid the indirect call, we know the destructor is sock_rfree */ - skb->destructor = NULL; - skb->sk = NULL; -@@ -1956,7 +1968,6 @@ static int __mptcp_recvmsg_mskq(struct s - sk_mem_uncharge(sk, skb->truesize); - __skb_unlink(skb, &sk->sk_receive_queue); - skb_attempt_defer_free(skb); -- msk->bytes_consumed += count; - } - - if (copied >= len) -@@ -2154,7 +2165,8 @@ static int mptcp_recvmsg(struct sock *sk - while (copied < len) { - int err, bytes_read; - -- bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); -+ bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, -+ copied, &tss, &cmsg_flags); - if (unlikely(bytes_read < 0)) { - if (!copied) - copied = bytes_read; diff --git a/queue-6.12/mptcp-leverage-skb-deferral-free.patch b/queue-6.12/mptcp-leverage-skb-deferral-free.patch deleted file mode 100644 index a099c64a93..0000000000 --- a/queue-6.12/mptcp-leverage-skb-deferral-free.patch +++ /dev/null @@ -1,50 +0,0 @@ -From stable+bounces-192097-greg=kroah.com@vger.kernel.org Mon Nov 3 08:27:49 2025 -From: Sasha Levin -Date: Sun, 2 Nov 2025 18:27:34 -0500 -Subject: mptcp: leverage skb deferral free -To: stable@vger.kernel.org -Cc: Paolo Abeni , Geliang Tang , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin -Message-ID: <20251102232735.3652847-3-sashal@kernel.org> - -From: Paolo Abeni - -[ Upstream commit 9aa59323f2709370cb4f01acbba599a9167f317b ] - -Usage of the skb deferral API is straight-forward; with multiple -subflows actives this allow moving part of the received application -load into multiple CPUs. - -Also fix a typo in the related comment. - -Reviewed-by: Geliang Tang -Tested-by: Geliang Tang -Reviewed-by: Matthieu Baerts (NGI0) -Signed-off-by: Paolo Abeni -Signed-off-by: Matthieu Baerts (NGI0) -Link: https://patch.msgid.link/20250927-net-next-mptcp-rcv-path-imp-v1-1-5da266aa9c1a@kernel.org -Signed-off-by: Jakub Kicinski -Stable-dep-of: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption") -Signed-off-by: Sasha Levin -Signed-off-by: Greg Kroah-Hartman ---- - net/mptcp/protocol.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - ---- a/net/mptcp/protocol.c -+++ b/net/mptcp/protocol.c -@@ -1949,12 +1949,13 @@ static int __mptcp_recvmsg_mskq(struct s - } - - if (!(flags & MSG_PEEK)) { -- /* avoid the indirect call, we know the destructor is sock_wfree */ -+ /* avoid the indirect call, we know the destructor is sock_rfree */ - skb->destructor = NULL; -+ skb->sk = NULL; - atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - sk_mem_uncharge(sk, skb->truesize); - __skb_unlink(skb, &sk->sk_receive_queue); -- __kfree_skb(skb); -+ skb_attempt_defer_free(skb); - msk->bytes_consumed += count; - } - diff --git a/queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch b/queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch deleted file mode 100644 index d6d527201a..0000000000 --- a/queue-6.12/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch +++ /dev/null @@ -1,359 +0,0 @@ -From stable+bounces-192095-greg=kroah.com@vger.kernel.org Mon Nov 3 08:27:43 2025 -From: Sasha Levin -Date: Sun, 2 Nov 2025 18:27:32 -0500 -Subject: mptcp: move the whole rx path under msk socket lock protection -To: stable@vger.kernel.org -Cc: Paolo Abeni , Mat Martineau , "Matthieu Baerts (NGI0)" , Jakub Kicinski , Sasha Levin -Message-ID: <20251102232735.3652847-1-sashal@kernel.org> - -From: Paolo Abeni - -[ Upstream commit bc68b0efa1bf923cef1294a631d8e7416c7e06e4 ] - -After commit c2e6048fa1cf ("mptcp: fix race in release_cb") we can -move the whole MPTCP rx path under the socket lock leveraging the -release_cb. - -We can drop a bunch of spin_lock pairs in the receive functions, use -a single receive queue and invoke __mptcp_move_skbs only when subflows -ask for it. - -This will allow more cleanup in the next patch. - -Some changes are worth specific mention: - -The msk rcvbuf update now always happens under both the msk and the -subflow socket lock: we can drop a bunch of ONCE annotation and -consolidate the checks. - -When the skbs move is delayed at msk release callback time, even the -msk rcvbuf update is delayed; additionally take care of such action in -__mptcp_move_skbs(). - -Signed-off-by: Paolo Abeni -Reviewed-by: Mat Martineau -Signed-off-by: Matthieu Baerts (NGI0) -Link: https://patch.msgid.link/20250218-net-next-mptcp-rx-path-refactor-v1-3-4a47d90d7998@kernel.org -Signed-off-by: Jakub Kicinski -Stable-dep-of: 8e04ce45a8db ("mptcp: fix MSG_PEEK stream corruption") -Signed-off-by: Sasha Levin -Signed-off-by: Greg Kroah-Hartman ---- - net/mptcp/fastopen.c | 1 - net/mptcp/protocol.c | 123 ++++++++++++++++++++++++--------------------------- - net/mptcp/protocol.h | 2 - 3 files changed, 60 insertions(+), 66 deletions(-) - ---- a/net/mptcp/fastopen.c -+++ b/net/mptcp/fastopen.c -@@ -49,6 +49,7 @@ void mptcp_fastopen_subflow_synack_set_p - MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; - - mptcp_data_lock(sk); -+ DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk)); - - mptcp_set_owner_r(skb, sk); - __skb_queue_tail(&sk->sk_receive_queue, skb); ---- a/net/mptcp/protocol.c -+++ b/net/mptcp/protocol.c -@@ -658,18 +658,6 @@ static bool __mptcp_move_skbs_from_subfl - bool more_data_avail; - struct tcp_sock *tp; - bool done = false; -- int sk_rbuf; -- -- sk_rbuf = READ_ONCE(sk->sk_rcvbuf); -- -- if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { -- int ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); -- -- if (unlikely(ssk_rbuf > sk_rbuf)) { -- WRITE_ONCE(sk->sk_rcvbuf, ssk_rbuf); -- sk_rbuf = ssk_rbuf; -- } -- } - - pr_debug("msk=%p ssk=%p\n", msk, ssk); - tp = tcp_sk(ssk); -@@ -737,7 +725,7 @@ static bool __mptcp_move_skbs_from_subfl - WRITE_ONCE(tp->copied_seq, seq); - more_data_avail = mptcp_subflow_data_available(ssk); - -- if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) { -+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { - done = true; - break; - } -@@ -861,11 +849,30 @@ static bool move_skbs_to_msk(struct mptc - return moved > 0; - } - -+static void __mptcp_rcvbuf_update(struct sock *sk, struct sock *ssk) -+{ -+ if (unlikely(ssk->sk_rcvbuf > sk->sk_rcvbuf)) -+ WRITE_ONCE(sk->sk_rcvbuf, ssk->sk_rcvbuf); -+} -+ -+static void __mptcp_data_ready(struct sock *sk, struct sock *ssk) -+{ -+ struct mptcp_sock *msk = mptcp_sk(sk); -+ -+ __mptcp_rcvbuf_update(sk, ssk); -+ -+ /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ -+ if (__mptcp_rmem(sk) > sk->sk_rcvbuf) -+ return; -+ -+ /* Wake-up the reader only for in-sequence data */ -+ if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) -+ sk->sk_data_ready(sk); -+} -+ - void mptcp_data_ready(struct sock *sk, struct sock *ssk) - { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); -- struct mptcp_sock *msk = mptcp_sk(sk); -- int sk_rbuf, ssk_rbuf; - - /* The peer can send data while we are shutting down this - * subflow at msk destruction time, but we must avoid enqueuing -@@ -874,19 +881,11 @@ void mptcp_data_ready(struct sock *sk, s - if (unlikely(subflow->disposable)) - return; - -- ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); -- sk_rbuf = READ_ONCE(sk->sk_rcvbuf); -- if (unlikely(ssk_rbuf > sk_rbuf)) -- sk_rbuf = ssk_rbuf; -- -- /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ -- if (__mptcp_rmem(sk) > sk_rbuf) -- return; -- -- /* Wake-up the reader only for in-sequence data */ - mptcp_data_lock(sk); -- if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) -- sk->sk_data_ready(sk); -+ if (!sock_owned_by_user(sk)) -+ __mptcp_data_ready(sk, ssk); -+ else -+ __set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags); - mptcp_data_unlock(sk); - } - -@@ -1975,16 +1974,17 @@ do_error: - - static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); - --static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, -+static int __mptcp_recvmsg_mskq(struct sock *sk, - struct msghdr *msg, - size_t len, int flags, - struct scm_timestamping_internal *tss, - int *cmsg_flags) - { -+ struct mptcp_sock *msk = mptcp_sk(sk); - struct sk_buff *skb, *tmp; - int copied = 0; - -- skb_queue_walk_safe(&msk->receive_queue, skb, tmp) { -+ skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { - u32 offset = MPTCP_SKB_CB(skb)->offset; - u32 data_len = skb->len - offset; - u32 count = min_t(size_t, len - copied, data_len); -@@ -2019,7 +2019,7 @@ static int __mptcp_recvmsg_mskq(struct m - /* we will bulk release the skb memory later */ - skb->destructor = NULL; - WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); -- __skb_unlink(skb, &msk->receive_queue); -+ __skb_unlink(skb, &sk->sk_receive_queue); - __kfree_skb(skb); - msk->bytes_consumed += count; - } -@@ -2144,54 +2144,46 @@ static void __mptcp_update_rmem(struct s - WRITE_ONCE(msk->rmem_released, 0); - } - --static void __mptcp_splice_receive_queue(struct sock *sk) -+static bool __mptcp_move_skbs(struct sock *sk) - { -+ struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); -- -- skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue); --} -- --static bool __mptcp_move_skbs(struct mptcp_sock *msk) --{ -- struct sock *sk = (struct sock *)msk; - unsigned int moved = 0; - bool ret, done; - -+ /* verify we can move any data from the subflow, eventually updating */ -+ if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) -+ mptcp_for_each_subflow(msk, subflow) -+ __mptcp_rcvbuf_update(sk, subflow->tcp_sock); -+ -+ if (__mptcp_rmem(sk) > sk->sk_rcvbuf) -+ return false; -+ - do { - struct sock *ssk = mptcp_subflow_recv_lookup(msk); - bool slowpath; - -- /* we can have data pending in the subflows only if the msk -- * receive buffer was full at subflow_data_ready() time, -- * that is an unlikely slow path. -- */ -- if (likely(!ssk)) -+ if (unlikely(!ssk)) - break; - - slowpath = lock_sock_fast(ssk); -- mptcp_data_lock(sk); - __mptcp_update_rmem(sk); - done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); -- mptcp_data_unlock(sk); - - if (unlikely(ssk->sk_err)) - __mptcp_error_report(sk); - unlock_sock_fast(ssk, slowpath); - } while (!done); - -- /* acquire the data lock only if some input data is pending */ - ret = moved > 0; - if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) || -- !skb_queue_empty_lockless(&sk->sk_receive_queue)) { -- mptcp_data_lock(sk); -+ !skb_queue_empty(&sk->sk_receive_queue)) { - __mptcp_update_rmem(sk); - ret |= __mptcp_ofo_queue(msk); -- __mptcp_splice_receive_queue(sk); -- mptcp_data_unlock(sk); - } - if (ret) - mptcp_check_data_fin((struct sock *)msk); -- return !skb_queue_empty(&msk->receive_queue); -+ return ret; - } - - static unsigned int mptcp_inq_hint(const struct sock *sk) -@@ -2199,7 +2191,7 @@ static unsigned int mptcp_inq_hint(const - const struct mptcp_sock *msk = mptcp_sk(sk); - const struct sk_buff *skb; - -- skb = skb_peek(&msk->receive_queue); -+ skb = skb_peek(&sk->sk_receive_queue); - if (skb) { - u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq; - -@@ -2245,7 +2237,7 @@ static int mptcp_recvmsg(struct sock *sk - while (copied < len) { - int err, bytes_read; - -- bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags); -+ bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); - if (unlikely(bytes_read < 0)) { - if (!copied) - copied = bytes_read; -@@ -2254,7 +2246,7 @@ static int mptcp_recvmsg(struct sock *sk - - copied += bytes_read; - -- if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) -+ if (skb_queue_empty(&sk->sk_receive_queue) && __mptcp_move_skbs(sk)) - continue; - - /* only the MPTCP socket status is relevant here. The exit -@@ -2280,7 +2272,7 @@ static int mptcp_recvmsg(struct sock *sk - /* race breaker: the shutdown could be after the - * previous receive queue check - */ -- if (__mptcp_move_skbs(msk)) -+ if (__mptcp_move_skbs(sk)) - continue; - break; - } -@@ -2324,9 +2316,8 @@ out_err: - } - } - -- pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n", -- msk, skb_queue_empty_lockless(&sk->sk_receive_queue), -- skb_queue_empty(&msk->receive_queue), copied); -+ pr_debug("msk=%p rx queue empty=%d copied=%d\n", -+ msk, skb_queue_empty(&sk->sk_receive_queue), copied); - - release_sock(sk); - return copied; -@@ -2866,7 +2857,6 @@ static void __mptcp_init_sock(struct soc - INIT_LIST_HEAD(&msk->join_list); - INIT_LIST_HEAD(&msk->rtx_queue); - INIT_WORK(&msk->work, mptcp_worker); -- __skb_queue_head_init(&msk->receive_queue); - msk->out_of_order_queue = RB_ROOT; - msk->first_pending = NULL; - WRITE_ONCE(msk->rmem_fwd_alloc, 0); -@@ -3462,12 +3452,8 @@ void mptcp_destroy_common(struct mptcp_s - mptcp_for_each_subflow_safe(msk, subflow, tmp) - __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags); - -- /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ -- mptcp_data_lock(sk); -- skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue); - __skb_queue_purge(&sk->sk_receive_queue); - skb_rbtree_purge(&msk->out_of_order_queue); -- mptcp_data_unlock(sk); - - /* move all the rx fwd alloc into the sk_mem_reclaim_final in - * inet_sock_destruct() will dispose it -@@ -3507,7 +3493,8 @@ void __mptcp_check_push(struct sock *sk, - - #define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \ - BIT(MPTCP_RETRANSMIT) | \ -- BIT(MPTCP_FLUSH_JOIN_LIST)) -+ BIT(MPTCP_FLUSH_JOIN_LIST) | \ -+ BIT(MPTCP_DEQUEUE)) - - /* processes deferred events and flush wmem */ - static void mptcp_release_cb(struct sock *sk) -@@ -3541,6 +3528,11 @@ static void mptcp_release_cb(struct sock - __mptcp_push_pending(sk, 0); - if (flags & BIT(MPTCP_RETRANSMIT)) - __mptcp_retrans(sk); -+ if ((flags & BIT(MPTCP_DEQUEUE)) && __mptcp_move_skbs(sk)) { -+ /* notify ack seq update */ -+ mptcp_cleanup_rbuf(msk, 0); -+ sk->sk_data_ready(sk); -+ } - - cond_resched(); - spin_lock_bh(&sk->sk_lock.slock); -@@ -3783,7 +3775,8 @@ static int mptcp_ioctl(struct sock *sk, - return -EINVAL; - - lock_sock(sk); -- __mptcp_move_skbs(msk); -+ if (__mptcp_move_skbs(sk)) -+ mptcp_cleanup_rbuf(msk, 0); - *karg = mptcp_inq_hint(sk); - release_sock(sk); - break; ---- a/net/mptcp/protocol.h -+++ b/net/mptcp/protocol.h -@@ -124,6 +124,7 @@ - #define MPTCP_FLUSH_JOIN_LIST 5 - #define MPTCP_SYNC_STATE 6 - #define MPTCP_SYNC_SNDBUF 7 -+#define MPTCP_DEQUEUE 8 - - struct mptcp_skb_cb { - u64 map_seq; -@@ -324,7 +325,6 @@ struct mptcp_sock { - struct work_struct work; - struct sk_buff *ooo_last_skb; - struct rb_root out_of_order_queue; -- struct sk_buff_head receive_queue; - struct list_head conn_list; - struct list_head rtx_queue; - struct mptcp_data_frag *first_pending; diff --git a/queue-6.12/series b/queue-6.12/series index c77e386192..5d8c9f8a31 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -77,8 +77,4 @@ net-phy-dp83867-disable-eee-support-as-not-implemented.patch sched_ext-mark-scx_bpf_dsq_move_set_-with-kf_rcu.patch cpuidle-governors-menu-rearrange-main-loop-in-menu_select.patch cpuidle-governors-menu-select-polling-state-in-some-more-cases.patch -mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection.patch -mptcp-cleanup-mem-accounting.patch -mptcp-leverage-skb-deferral-free.patch -mptcp-fix-msg_peek-stream-corruption.patch mfd-kempld-switch-back-to-earlier-init-behavior.patch -- 2.47.3