]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
tcp: add tcp_rto_max_ms sysctl
authorEric Dumazet <edumazet@google.com>
Fri, 7 Feb 2025 15:28:30 +0000 (15:28 +0000)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 11 Feb 2025 12:08:00 +0000 (13:08 +0100)
Previous patch added a TCP_RTO_MAX_MS socket option
to tune a TCP socket max RTO value.

Many setups prefer to change a per netns sysctl.

This patch adds /proc/sys/net/ipv4/tcp_rto_max_ms

Its initial value is 120000 (120 seconds).

Keep in mind that a decrease of tcp_rto_max_ms
means shorter overall timeouts, unless tcp_retries2
sysctl is increased.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Documentation/networking/ip-sysctl.rst
Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
include/net/netns/ipv4.h
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_ipv4.c

index 363b4950d542aa32fbf6ab1617de46a900061f82..054561f8dcae77d4183f5b7e45f671ba8979390a 100644 (file)
@@ -705,6 +705,8 @@ tcp_retries2 - INTEGER
        seconds and is a lower bound for the effective timeout.
        TCP will effectively time out at the first RTO which exceeds the
        hypothetical timeout.
+       If tcp_rto_max_ms is decreased, it is recommended to also
+       change tcp_retries2.
 
        RFC 1122 recommends at least 100 seconds for the timeout,
        which corresponds to a value of at least 8.
@@ -1237,6 +1239,17 @@ tcp_rto_min_us - INTEGER
 
        Default: 200000
 
+tcp_rto_max_ms - INTEGER
+       Maximal TCP retransmission timeout (in ms).
+       Note that TCP_RTO_MAX_MS socket option has higher precedence.
+
+       When changing tcp_rto_max_ms, it is important to understand
+       that tcp_retries2 might need a change.
+
+       Possible Values: 1000 - 120,000
+
+       Default: 120,000
+
 UDP variables
 =============
 
index de0263302f16dd815593671c4f75a93ed6f7cac4..6e7b20afd2d4984233e91d713ee9acd4b2e007f2 100644 (file)
@@ -86,6 +86,7 @@ u8                              sysctl_tcp_sack
 u8                              sysctl_tcp_window_scaling                                                            tcp_syn_options,tcp_parse_options
 u8                              sysctl_tcp_timestamps
 u8                              sysctl_tcp_early_retrans                     read_mostly                             tcp_schedule_loss_probe(tcp_write_xmit)
+u32                             sysctl_tcp_rto_max_ms
 u8                              sysctl_tcp_recovery                                                                  tcp_fastretrans_alert
 u8                              sysctl_tcp_thin_linear_timeouts                                                      tcp_retrans_timer(on_thin_streams)
 u8                              sysctl_tcp_slow_start_after_idle                                                     unlikely(tcp_cwnd_validate-network-not-starved)
index 46452da352061007d19d00fdacddd25bbe56444d..45ac125e8aebb99d4083d540c907f0d560dac0b0 100644 (file)
@@ -181,6 +181,7 @@ struct netns_ipv4 {
        u8 sysctl_tcp_window_scaling;
        u8 sysctl_tcp_timestamps;
        int sysctl_tcp_rto_min_us;
+       int sysctl_tcp_rto_max_ms;
        u8 sysctl_tcp_recovery;
        u8 sysctl_tcp_thin_linear_timeouts;
        u8 sysctl_tcp_slow_start_after_idle;
index 42cb5dc9cb245c26f9a38f8c8c4b26b1adddca39..3a43010d726fb103beaad2b11d6797424b0c946e 100644 (file)
@@ -28,6 +28,7 @@ static int tcp_adv_win_scale_max = 31;
 static int tcp_app_win_max = 31;
 static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
 static int tcp_min_snd_mss_max = 65535;
+static int tcp_rto_max_max = TCP_RTO_MAX_SEC * MSEC_PER_SEC;
 static int ip_privileged_port_min;
 static int ip_privileged_port_max = 65535;
 static int ip_ttl_min = 1;
@@ -1583,6 +1584,15 @@ static struct ctl_table ipv4_net_table[] = {
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ONE,
        },
+       {
+               .procname       = "tcp_rto_max_ms",
+               .data           = &init_net.ipv4.sysctl_tcp_rto_max_ms,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ONE_THOUSAND,
+               .extra2         = &tcp_rto_max_max,
+       },
 };
 
 static __net_init int ipv4_sysctl_init_net(struct net *net)
index 3bb8fbbb01d98c1b0d0bb19e8b2d049d06704e46..992d5c9b2487ce7a689253c068ad7b826f6b6fb0 100644 (file)
@@ -423,7 +423,7 @@ void tcp_init_sock(struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
-       int rto_min_us;
+       int rto_min_us, rto_max_ms;
 
        tp->out_of_order_queue = RB_ROOT;
        sk->tcp_rtx_queue = RB_ROOT;
@@ -433,8 +433,8 @@ void tcp_init_sock(struct sock *sk)
 
        icsk->icsk_rto = TCP_TIMEOUT_INIT;
 
-       /* Use a sysctl ? */
-       icsk->icsk_rto_max = TCP_RTO_MAX;
+       rto_max_ms = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_ms);
+       icsk->icsk_rto_max = msecs_to_jiffies(rto_max_ms);
 
        rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us);
        icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us);
index 06fb0123d2d60e22f19ea48b73ac3668c51465a2..d1fd2128ac6cce9b845b1f8d278a194c511db87b 100644 (file)
@@ -3532,6 +3532,7 @@ static int __net_init tcp_sk_init(struct net *net)
 
        net->ipv4.sysctl_tcp_pingpong_thresh = 1;
        net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN);
+       net->ipv4.sysctl_tcp_rto_max_ms = TCP_RTO_MAX_SEC * MSEC_PER_SEC;
 
        return 0;
 }