]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
tcp: introduce icsk->icsk_keepalive_timer
authorEric Dumazet <edumazet@google.com>
Mon, 24 Nov 2025 17:50:12 +0000 (17:50 +0000)
committerJakub Kicinski <kuba@kernel.org>
Wed, 26 Nov 2025 03:28:29 +0000 (19:28 -0800)
sk->sk_timer has been used for TCP keepalives.

Keepalive timers are not in fast path, we want to use sk->sk_timer
storage for retransmit timers, for better cache locality.

Create icsk->icsk_keepalive_timer and change keepalive
code to no longer use sk->sk_timer.

Added space is reclaimed in the following patch.

This includes changes to MPTCP, which was also using sk_timer.

Alias icsk->mptcp_tout_timer and icsk->icsk_keepalive_timer
for inet_sk_diag_fill() sake.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251124175013.1473655-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Documentation/networking/net_cachelines/inet_connection_sock.rst
include/net/inet_connection_sock.h
net/ipv4/inet_connection_sock.c
net/ipv4/inet_diag.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_timer.c
net/ipv6/tcp_ipv6.c
net/mptcp/protocol.c
net/mptcp/protocol.h
tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c

index 8fae85ebb773085b249c606ce37872e0566b70b4..4f65de2def8c9ccef1108f8f3a3de1d8c12b8497 100644 (file)
@@ -14,6 +14,7 @@ struct inet_bind_bucket             icsk_bind_hash         read_mostly
 struct inet_bind2_bucket            icsk_bind2_hash        read_mostly                             tcp_set_state,inet_put_port
 struct timer_list                   icsk_retransmit_timer  read_write                              inet_csk_reset_xmit_timer,tcp_connect
 struct timer_list                   icsk_delack_timer      read_mostly                             inet_csk_reset_xmit_timer,tcp_connect
+struct timer_list                   icsk_keepalive_timer
 u32                                 icsk_rto               read_write                              tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one
 u32                                 icsk_rto_min
 u32                                 icsk_rto_max           read_mostly                             tcp_reset_xmit_timer
index 765c2149d6787ef1063e5f29d78547ec6ca79746..e0d90b996348d895256191a5f10275d8f3f3a69a 100644 (file)
@@ -57,6 +57,9 @@ struct inet_connection_sock_af_ops {
  * @icsk_bind_hash:       Bind node
  * @icsk_bind2_hash:      Bind node in the bhash2 table
  * @icsk_retransmit_timer: Resend (no ack)
+ * @icsk_delack_timer:     Delayed ACK timer
+ * @icsk_keepalive_timer:  Keepalive timer
+ * @mptcp_tout_timer: mptcp timer
  * @icsk_rto:             Retransmit timeout
  * @icsk_pmtu_cookie      Last pmtu seen by socket
  * @icsk_ca_ops                   Pluggable congestion control hook
@@ -81,8 +84,12 @@ struct inet_connection_sock {
        struct request_sock_queue icsk_accept_queue;
        struct inet_bind_bucket   *icsk_bind_hash;
        struct inet_bind2_bucket  *icsk_bind2_hash;
-       struct timer_list         icsk_retransmit_timer;
-       struct timer_list         icsk_delack_timer;
+       struct timer_list         icsk_retransmit_timer;
+       struct timer_list         icsk_delack_timer;
+       union {
+               struct timer_list icsk_keepalive_timer;
+               struct timer_list mptcp_tout_timer;
+       };
        __u32                     icsk_rto;
        __u32                     icsk_rto_min;
        u32                       icsk_rto_max;
index b4eae731c9ba5693b38ee063decaa6fd776d9b8b..4fc09f9bf25d59e8155107eba391f5c566f290a0 100644 (file)
@@ -739,7 +739,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
 
        timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
        timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
-       timer_setup(&sk->sk_timer, keepalive_handler, 0);
+       timer_setup(&icsk->icsk_keepalive_timer, keepalive_handler, 0);
        icsk->icsk_pending = icsk->icsk_ack.pending = 0;
 }
 
@@ -752,7 +752,7 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
 
        sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
        sk_stop_timer(sk, &icsk->icsk_delack_timer);
-       sk_stop_timer(sk, &sk->sk_timer);
+       sk_stop_timer(sk, &icsk->icsk_keepalive_timer);
 }
 
 void inet_csk_clear_xmit_timers_sync(struct sock *sk)
@@ -767,7 +767,7 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk)
 
        sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
        sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
-       sk_stop_timer_sync(sk, &sk->sk_timer);
+       sk_stop_timer_sync(sk, &icsk->icsk_keepalive_timer);
 }
 
 struct dst_entry *inet_csk_route_req(const struct sock *sk,
index 9f63c09439a055550c49b659f23ff8a00ee80348..3f5b1418a6109bd4e398fb2a7d95013044e75f08 100644 (file)
@@ -293,11 +293,11 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
                r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out);
                r->idiag_expires =
                        jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies);
-       } else if (timer_pending(&sk->sk_timer)) {
+       } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
                r->idiag_timer = 2;
                r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out);
                r->idiag_expires =
-                       jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
+                       jiffies_delta_to_msecs(icsk->icsk_keepalive_timer.expires - jiffies);
        }
 
        if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
index 7b8af2c8d03a4cf2c0d90029d2725c0f9dc1a071..f8a9596e8f4d41563896f02329d20b731fe7961f 100644 (file)
@@ -2873,9 +2873,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
        } else if (icsk_pending == ICSK_TIME_PROBE0) {
                timer_active    = 4;
                timer_expires   = tcp_timeout_expires(sk);
-       } else if (timer_pending(&sk->sk_timer)) {
+       } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
                timer_active    = 2;
-               timer_expires   = sk->sk_timer.expires;
+               timer_expires   = icsk->icsk_keepalive_timer.expires;
        } else {
                timer_active    = 0;
                timer_expires = jiffies;
index afbd901e610e24c88439d5c152531074d514533a..d2678dfd811806840cb332d47750dd771b20d6af 100644 (file)
@@ -755,12 +755,12 @@ void tcp_syn_ack_timeout(const struct request_sock *req)
 
 void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len)
 {
-       sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+       sk_reset_timer(sk, &inet_csk(sk)->icsk_keepalive_timer, jiffies + len);
 }
 
 static void tcp_delete_keepalive_timer(struct sock *sk)
 {
-       sk_stop_timer(sk, &sk->sk_timer);
+       sk_stop_timer(sk, &inet_csk(sk)->icsk_keepalive_timer);
 }
 
 void tcp_set_keepalive(struct sock *sk, int val)
@@ -777,8 +777,9 @@ EXPORT_IPV6_MOD_GPL(tcp_set_keepalive);
 
 static void tcp_keepalive_timer(struct timer_list *t)
 {
-       struct sock *sk = timer_container_of(sk, t, sk_timer);
-       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_connection_sock *icsk =
+               timer_container_of(icsk, t, icsk_keepalive_timer);
+       struct sock *sk = &icsk->icsk_inet.sk;
        struct tcp_sock *tp = tcp_sk(sk);
        u32 elapsed;
 
index 33c76c3a6da7cb0a1a49344ffe9ae27f0e949388..280fe59785598e269183bf90f962ea8d58632b9a 100644 (file)
@@ -2167,9 +2167,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
        } else if (icsk_pending == ICSK_TIME_PROBE0) {
                timer_active    = 4;
                timer_expires   = tcp_timeout_expires(sp);
-       } else if (timer_pending(&sp->sk_timer)) {
+       } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
                timer_active    = 2;
-               timer_expires   = sp->sk_timer.expires;
+               timer_expires   = icsk->icsk_keepalive_timer.expires;
        } else {
                timer_active    = 0;
                timer_expires = jiffies;
index 4288b6a53b6e46617511c3db9b900cfde644295b..89a5f63921e6afc60e9c49ea49f2fd7c8331d377 100644 (file)
@@ -2374,7 +2374,9 @@ static void mptcp_retransmit_timer(struct timer_list *t)
 
 static void mptcp_tout_timer(struct timer_list *t)
 {
-       struct sock *sk = timer_container_of(sk, t, sk_timer);
+       struct inet_connection_sock *icsk =
+               timer_container_of(icsk, t, mptcp_tout_timer);
+       struct sock *sk = &icsk->icsk_inet.sk;
 
        mptcp_schedule_work(sk);
        sock_put(sk);
@@ -2828,7 +2830,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
         */
        timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
 
-       sk_reset_timer(sk, &sk->sk_timer, timeout);
+       sk_reset_timer(sk, &inet_csk(sk)->mptcp_tout_timer, timeout);
 }
 
 static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
@@ -2974,7 +2976,7 @@ static void __mptcp_init_sock(struct sock *sk)
 
        /* re-use the csk retrans timer for MPTCP-level retrans */
        timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
-       timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);
+       timer_setup(&msk->sk.mptcp_tout_timer, mptcp_tout_timer, 0);
 }
 
 static void mptcp_ca_reset(struct sock *sk)
@@ -3176,7 +3178,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
        might_sleep();
 
        mptcp_stop_rtx_timer(sk);
-       sk_stop_timer(sk, &sk->sk_timer);
+       sk_stop_timer(sk, &inet_csk(sk)->mptcp_tout_timer);
        msk->pm.status = 0;
        mptcp_release_sched(msk);
 
index 313da78e2b752b691a5be14b9c636abf642a0144..9c0d17876b22f50503513a4aa36db5a5f9d61a68 100644 (file)
@@ -892,7 +892,7 @@ static inline void mptcp_stop_tout_timer(struct sock *sk)
        if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
                return;
 
-       sk_stop_timer(sk, &sk->sk_timer);
+       sk_stop_timer(sk, &inet_csk(sk)->mptcp_tout_timer);
        inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
 }
 
index 164640db3a29cf720e193453cab79f4bc317917c..685811326a04126f411da2199cbb5dba576cdde7 100644 (file)
@@ -103,9 +103,9 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
                timer_active = 4;
                timer_expires = icsk->icsk_retransmit_timer.expires;
-       } else if (timer_pending(&sp->sk_timer)) {
+       } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
                timer_active = 2;
-               timer_expires = sp->sk_timer.expires;
+               timer_expires = icsk->icsk_keepalive_timer.expires;
        } else {
                timer_active = 0;
                timer_expires = bpf_jiffies64();
index 591c703f5032f024e4b511a6af8d63d1233a042a..0f4a927127517ce3d156c718c3ddece0407c3137 100644 (file)
@@ -103,9 +103,9 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
        } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
                timer_active = 4;
                timer_expires = icsk->icsk_retransmit_timer.expires;
-       } else if (timer_pending(&sp->sk_timer)) {
+       } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
                timer_active = 2;
-               timer_expires = sp->sk_timer.expires;
+               timer_expires = icsk->icsk_keepalive_timer.expires;
        } else {
                timer_active = 0;
                timer_expires = bpf_jiffies64();