tcp: fix sk_rcvbuf overshoot

author Eric Dumazet <edumazet@google.com>

Tue, 13 May 2025 19:39:10 +0000 (19:39 +0000)

committer Jakub Kicinski <kuba@kernel.org>

Thu, 15 May 2025 18:30:08 +0000 (11:30 -0700)
author Eric Dumazet <edumazet@google.com>
Tue, 13 May 2025 19:39:10 +0000 (19:39 +0000)
committer Jakub Kicinski <kuba@kernel.org>
Thu, 15 May 2025 18:30:08 +0000 (11:30 -0700)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 88beb6d0f7b5981e65937a6727a1111fd341335b..89e886bb0fa11666ca4b51b032d536f233078dca 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -747,6 +747,29 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
         }
  }
  
+static void tcp_rcvbuf_grow(struct sock *sk)
+{
+       const struct net *net = sock_net(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+       int rcvwin, rcvbuf, cap;
+
+       if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) ||
+           (sk->sk_userlocks & SOCK_RCVBUF_LOCK))
+               return;
+
+       /* slow start: allow the sender to double its rate. */
+       rcvwin = tp->rcvq_space.space << 1;
+
+       cap = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
+
+       rcvbuf = min_t(u32, tcp_space_from_win(sk, rcvwin), cap);
+       if (rcvbuf > sk->sk_rcvbuf) {
+               WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+               /* Make the window clamp follow along.  */
+               WRITE_ONCE(tp->window_clamp,
+                          tcp_win_from_space(sk, rcvbuf));
+       }
+}
  /*
   * This function should be called every time data is copied to user space.
   * It calculates the appropriate TCP receive buffer space.
@@ -771,42 +794,10 @@ void tcp_rcv_space_adjust(struct sock *sk)
  
         trace_tcp_rcvbuf_grow(sk, time);
  
-       /* A bit of theory :
-        * copied = bytes received in previous RTT, our base window
-        * To cope with packet losses, we need a 2x factor
-        * To cope with slow start, and sender growing its cwin by 100 %
-        * every RTT, we need a 4x factor, because the ACK we are sending
-        * now is for the next RTT, not the current one :
-        * <prev RTT . ><current RTT .. ><next RTT .... >
-        */
-
-       if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
-           !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
-               u64 rcvwin, grow;
-               int rcvbuf;
-
-               /* minimal window to cope with packet losses, assuming
-                * steady state. Add some cushion because of small variations.
-                */
-               rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
-
-               /* Accommodate for sender rate increase (eg. slow start) */
-               grow = rcvwin * (copied - tp->rcvq_space.space);
-               do_div(grow, tp->rcvq_space.space);
-               rcvwin += (grow << 1);
-
-               rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
-                              READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
-               if (rcvbuf > sk->sk_rcvbuf) {
-                       WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
-
-                       /* Make the window clamp follow along.  */
-                       WRITE_ONCE(tp->window_clamp,
-                                  tcp_win_from_space(sk, rcvbuf));
-               }
-       }
         tp->rcvq_space.space = copied;
  
+       tcp_rcvbuf_grow(sk);
+
  new_measure:
         tp->rcvq_space.seq = tp->copied_seq;
         tp->rcvq_space.time = tp->tcp_mstamp;
author	Eric Dumazet <edumazet@google.com>
	Tue, 13 May 2025 19:39:10 +0000 (19:39 +0000)
committer	Jakub Kicinski <kuba@kernel.org>
	Thu, 15 May 2025 18:30:08 +0000 (11:30 -0700)