]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
mptcp: avoid unneeded subflow-level drops
authorPaolo Abeni <pabeni@redhat.com>
Tue, 18 Nov 2025 07:20:20 +0000 (08:20 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 1 Dec 2025 10:43:26 +0000 (11:43 +0100)
commit 4f102d747cadd8f595f2b25882eed9bec1675fb1 upstream.

The rcv window is shared among all the subflows. Currently, MPTCP sync
the TCP-level rcv window with the MPTCP one at tcp_transmit_skb() time.

The above means that incoming data may sporadically observe outdated
TCP-level rcv window and being wrongly dropped by TCP.

Address the issue checking for the edge condition before queuing the
data at TCP level, and eventually syncing the rcv window as needed.

Note that the issue is actually present from the very first MPTCP
implementation, but backports older than the blamed commit below will
range from impossible to useless.

Before:

  $ nstat -n; sleep 1; nstat -z TcpExtBeyondWindow
  TcpExtBeyondWindow              14                 0.0

After:

  $ nstat -n; sleep 1; nstat -z TcpExtBeyondWindow
  TcpExtBeyondWindow              0                  0.0

Fixes: fa3fe2b15031 ("mptcp: track window announced to peer")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-2-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
net/mptcp/options.c
net/mptcp/protocol.h

index fa50c0a7ff5413a548e2c7de30e28c168c5d77bf..bc089388530b8f52639ac6005b7a77b8529882aa 100644 (file)
@@ -1044,6 +1044,31 @@ static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
        WRITE_ONCE(msk->snd_una, new_snd_una);
 }
 
+static void rwin_update(struct mptcp_sock *msk, struct sock *ssk,
+                       struct sk_buff *skb)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+       struct tcp_sock *tp = tcp_sk(ssk);
+       u64 mptcp_rcv_wnd;
+
+       /* Avoid touching extra cachelines if TCP is going to accept this
+        * skb without filling the TCP-level window even with a possibly
+        * outdated mptcp-level rwin.
+        */
+       if (!skb->len || skb->len < tcp_receive_window(tp))
+               return;
+
+       mptcp_rcv_wnd = atomic64_read(&msk->rcv_wnd_sent);
+       if (!after64(mptcp_rcv_wnd, subflow->rcv_wnd_sent))
+               return;
+
+       /* Some other subflow grew the mptcp-level rwin since rcv_wup,
+        * resync.
+        */
+       tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent;
+       subflow->rcv_wnd_sent = mptcp_rcv_wnd;
+}
+
 static void ack_update_msk(struct mptcp_sock *msk,
                           struct sock *ssk,
                           struct mptcp_options_received *mp_opt)
@@ -1211,6 +1236,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
         */
        if (mp_opt.use_ack)
                ack_update_msk(msk, sk, &mp_opt);
+       rwin_update(msk, sk, skb);
 
        /* Zero-data-length packets are dropped by the caller and not
         * propagated to the MPTCP layer, so the skb extension does not
@@ -1297,6 +1323,10 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
 
        if (rcv_wnd_new != rcv_wnd_old) {
 raise_win:
+               /* The msk-level rcv wnd is after the tcp level one,
+                * sync the latter.
+                */
+               rcv_wnd_new = rcv_wnd_old;
                win = rcv_wnd_old - ack_seq;
                tp->rcv_wnd = min_t(u64, win, U32_MAX);
                new_win = tp->rcv_wnd;
@@ -1320,6 +1350,7 @@ raise_win:
 
 update_wspace:
        WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
+       subflow->rcv_wnd_sent = rcv_wnd_new;
 }
 
 static void mptcp_track_rwin(struct tcp_sock *tp)
index 388d112cb0a7f308e77f042f0106ab4b8287bccf..0239bb19203e206e9a69c9c13a533ec687d0dc65 100644 (file)
@@ -508,6 +508,7 @@ struct mptcp_subflow_context {
        u64     remote_key;
        u64     idsn;
        u64     map_seq;
+       u64     rcv_wnd_sent;
        u32     snd_isn;
        u32     token;
        u32     rel_write_seq;