From: Paolo Abeni Date: Tue, 2 Jun 2026 12:14:10 +0000 (+1000) Subject: mptcp: close TOCTOU race while computing rcv_wnd X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8ab24fdebc369c0dfb90f82c1650b1e66662bb45;p=thirdparty%2Flinux.git mptcp: close TOCTOU race while computing rcv_wnd The MPTCP output path access locklessly the MPTCP-level ack_seq in multiple times, using possibly different values for the data_ack in the DSS option and to compute the announced rcv wnd for the same packet. Refactor the cote to avoid inconsistencies which may confuse the peer. Also ensure that the MPTCP level rcv wnd is updated only when the egress packet actually contains a DSS ack. Fixes: fa3fe2b15031 ("mptcp: track window announced to peer") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260602-net-mptcp-misc-fixes-7-1-rc7-v2-3-856831229976@kernel.org Signed-off-by: Jakub Kicinski --- diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8a1c5698983c..2d25f319f328 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -570,7 +570,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, struct mptcp_ext *mpext; unsigned int ack_size; bool ret = false; - u64 ack_seq; opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL; @@ -601,14 +600,11 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, return ret; } - ack_seq = READ_ONCE(msk->ack_seq); if (READ_ONCE(msk->use_64bit_ack)) { ack_size = TCPOLEN_MPTCP_DSS_ACK64; - opts->ext_copy.data_ack = ack_seq; opts->ext_copy.ack64 = 1; } else { ack_size = TCPOLEN_MPTCP_DSS_ACK32; - opts->ext_copy.data_ack32 = (uint32_t)ack_seq; opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; @@ -1297,19 +1293,14 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) return true; } -static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) +static u64 mptcp_set_rwin(struct mptcp_sock *msk, struct tcp_sock *tp, + struct tcphdr *th, u64 ack_seq) { const struct sock *ssk = (const struct sock *)tp; - struct mptcp_subflow_context *subflow; - u64 ack_seq, rcv_wnd_old, rcv_wnd_new; - struct mptcp_sock *msk; + u64 rcv_wnd_old, rcv_wnd_new; u32 new_win; u64 win; - subflow = mptcp_subflow_ctx(ssk); - msk = mptcp_sk(subflow->conn); - - ack_seq = READ_ONCE(msk->ack_seq); rcv_wnd_new = ack_seq + tp->rcv_wnd; rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent); @@ -1362,7 +1353,7 @@ raise_win: update_wspace: WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); - subflow->rcv_wnd_sent = rcv_wnd_new; + return rcv_wnd_new; } static void mptcp_track_rwin(struct tcp_sock *tp) @@ -1474,13 +1465,25 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp, *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); if (mpext->use_ack) { + struct mptcp_sock *msk; + u64 ack_seq; + + /* DSS option is set only by mptcp_established_options, + * the caller is __tcp_transmit_skb() and ssk is always + * not NULL. + */ + subflow = mptcp_subflow_ctx(ssk); + msk = mptcp_sk(subflow->conn); + ack_seq = READ_ONCE(msk->ack_seq); if (mpext->ack64) { - put_unaligned_be64(mpext->data_ack, ptr); + put_unaligned_be64(ack_seq, ptr); ptr += 2; } else { - put_unaligned_be32(mpext->data_ack32, ptr); + put_unaligned_be32(ack_seq, ptr); ptr += 1; } + subflow->rcv_wnd_sent = mptcp_set_rwin(msk, tp, th, + ack_seq); } if (mpext->use_map) { @@ -1708,9 +1711,6 @@ mp_capable_done: i += 4; } } - - if (tp) - mptcp_set_rwin(tp, th); } __be32 mptcp_get_reset_option(const struct sk_buff *skb)