For TCP flows using ms RFC 7323 timestamp granularity
tcp_rcv_rtt_update() can be fed with 1 ms samples, breaking
TCP autotuning for data center flows with sub ms RTT.
Instead, rely on the window based samples, fed by tcp_rcv_rtt_measure()
tcp_rcvbuf_grow() for a 10 second TCP_STREAM sesssion now looks saner.
We can see rcvbuf is kept at a reasonable value.
222.234976: tcp:tcp_rcvbuf_grow: time=348 rtt_us=330 copied=110592 inq=0 space=40960 ooo=0 scaling_ratio=230 rcvbuf=131072 ...
222.235276: tcp:tcp_rcvbuf_grow: time=300 rtt_us=288 copied=126976 inq=0 space=110592 ooo=0 scaling_ratio=230 rcvbuf=246187 ...
222.235569: tcp:tcp_rcvbuf_grow: time=294 rtt_us=288 copied=184320 inq=0 space=126976 ooo=0 scaling_ratio=230 rcvbuf=282659 ...
222.235833: tcp:tcp_rcvbuf_grow: time=264 rtt_us=244 copied=373760 inq=0 space=184320 ooo=0 scaling_ratio=230 rcvbuf=410312 ...
222.236142: tcp:tcp_rcvbuf_grow: time=308 rtt_us=219 copied=424960 inq=20480 space=373760 ooo=0 scaling_ratio=230 rcvbuf=832022 ...
222.236378: tcp:tcp_rcvbuf_grow: time=236 rtt_us=219 copied=692224 inq=49152 space=404480 ooo=0 scaling_ratio=230 rcvbuf=900407 ...
222.236602: tcp:tcp_rcvbuf_grow: time=225 rtt_us=219 copied=730112 inq=49152 space=643072 ooo=0 scaling_ratio=230 rcvbuf=
1431534 ...
222.237050: tcp:tcp_rcvbuf_grow: time=229 rtt_us=219 copied=
1160192 inq=49152 space=680960 ooo=0 scaling_ratio=230 rcvbuf=
1515876 ...
222.237618: tcp:tcp_rcvbuf_grow: time=305 rtt_us=218 copied=
2228224 inq=49152 space=
1111040 ooo=0 scaling_ratio=230 rcvbuf=
2473271 ...
222.238591: tcp:tcp_rcvbuf_grow: time=224 rtt_us=218 copied=
3063808 inq=360448 space=
2179072 ooo=0 scaling_ratio=230 rcvbuf=
4850803 ...
222.240647: tcp:tcp_rcvbuf_grow: time=260 rtt_us=218 copied=
2752512 inq=0 space=
2703360 ooo=0 scaling_ratio=230 rcvbuf=
6017914 ...
222.243535: tcp:tcp_rcvbuf_grow: time=224 rtt_us=218 copied=
2834432 inq=49152 space=
2752512 ooo=0 scaling_ratio=230 rcvbuf=
6127331 ...
222.245108: tcp:tcp_rcvbuf_grow: time=240 rtt_us=218 copied=
2883584 inq=49152 space=
2785280 ooo=0 scaling_ratio=230 rcvbuf=
6200275 ...
222.245333: tcp:tcp_rcvbuf_grow: time=224 rtt_us=218 copied=
2859008 inq=0 space=
2834432 ooo=0 scaling_ratio=230 rcvbuf=
6309692 ...
222.301021: tcp:tcp_rcvbuf_grow: time=222 rtt_us=218 copied=
2883584 inq=0 space=
2859008 ooo=0 scaling_ratio=230 rcvbuf=
6364400 ...
222.989242: tcp:tcp_rcvbuf_grow: time=225 rtt_us=218 copied=
2899968 inq=0 space=
2883584 ooo=0 scaling_ratio=230 rcvbuf=
6419108 ...
224.139553: tcp:tcp_rcvbuf_grow: time=224 rtt_us=218 copied=
3014656 inq=65536 space=
2899968 ooo=0 scaling_ratio=230 rcvbuf=
6455580 ...
224.584608: tcp:tcp_rcvbuf_grow: time=232 rtt_us=218 copied=
3014656 inq=49152 space=
2949120 ooo=0 scaling_ratio=230 rcvbuf=
6564997 ...
230.145560: tcp:tcp_rcvbuf_grow: time=223 rtt_us=218 copied=
2981888 inq=0 space=
2965504 ooo=0 scaling_ratio=230 rcvbuf=
6601469 ...
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Wei Wang <weiwan@google.com>
Link: https://patch.msgid.link/20250513193919.1089692-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
tp->rcv_rtt_est.time = tp->tcp_mstamp;
}
-static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
+static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp, u32 min_delta)
{
u32 delta, delta_us;
if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
if (!delta)
- delta = 1;
+ delta = min_delta;
delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
return delta_us;
}
if (TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
- s32 delta = tcp_rtt_tsopt_us(tp);
+ s32 delta = tcp_rtt_tsopt_us(tp, 0);
- if (delta >= 0)
+ if (delta > 0)
tcp_rcv_rtt_update(tp, delta, 0);
}
}
*/
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp &&
tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED)
- seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp);
+ seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp, 1);
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)