]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
tcp: avoid atomic operations on sk->sk_rmem_alloc
authorEric Dumazet <edumazet@google.com>
Thu, 20 Mar 2025 12:16:04 +0000 (12:16 +0000)
committerJakub Kicinski <kuba@kernel.org>
Tue, 25 Mar 2025 14:37:16 +0000 (07:37 -0700)
TCP uses generic skb_set_owner_r() and sock_rfree()
for received packets, with socket lock being owned.

Switch to private versions, avoiding two atomic operations
per packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250320121604.3342831-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/tcp.h
net/ipv4/tcp.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_input.c

index 4450c384ef178e860bd76c23653e9ce9d7a7289b..df04dc09c519d850579e22a17f49eeec7d22c607 100644 (file)
@@ -779,6 +779,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 
 /* tcp.c */
 void tcp_get_info(struct sock *, struct tcp_info *);
+void tcp_sock_rfree(struct sk_buff *skb);
 
 /* Read 'sendfile()'-style from a TCP socket */
 int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
@@ -2898,4 +2899,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk,
                const void *saddr, const void *daddr,
                int family, int dif, int sdif);
 
+/* version of skb_set_owner_r() avoiding one atomic_add() */
+static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+       skb_orphan(skb);
+       skb->sk = sk;
+       skb->destructor = tcp_sock_rfree;
+
+       sock_owned_by_me(sk);
+       atomic_set(&sk->sk_rmem_alloc,
+                  atomic_read(&sk->sk_rmem_alloc) + skb->truesize);
+
+       sk_forward_alloc_add(sk, -skb->truesize);
+}
+
 #endif /* _TCP_H */
index 6edc441b37023de48281aa810aa7a36199fd8bc3..ea8de00f669d059d97766529e3b8c53d5040456d 100644 (file)
@@ -1525,11 +1525,25 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
        __tcp_cleanup_rbuf(sk, copied);
 }
 
+/* private version of sock_rfree() avoiding one atomic_sub() */
+void tcp_sock_rfree(struct sk_buff *skb)
+{
+       struct sock *sk = skb->sk;
+       unsigned int len = skb->truesize;
+
+       sock_owned_by_me(sk);
+       atomic_set(&sk->sk_rmem_alloc,
+                  atomic_read(&sk->sk_rmem_alloc) - len);
+
+       sk_forward_alloc_add(sk, len);
+       sk_mem_reclaim(sk);
+}
+
 static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
 {
        __skb_unlink(skb, &sk->sk_receive_queue);
-       if (likely(skb->destructor == sock_rfree)) {
-               sock_rfree(skb);
+       if (likely(skb->destructor == tcp_sock_rfree)) {
+               tcp_sock_rfree(skb);
                skb->destructor = NULL;
                skb->sk = NULL;
                return skb_attempt_defer_free(skb);
index 1a6b1bc5424514e27a99cbb2fcedf001afd51d98..ca40665145c692ce0de518886bb366406606f7ac 100644 (file)
@@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
        tcp_segs_in(tp, skb);
        __skb_pull(skb, tcp_hdrlen(skb));
        sk_forced_mem_schedule(sk, skb->truesize);
-       skb_set_owner_r(skb, sk);
+       tcp_skb_set_owner_r(skb, sk);
 
        TCP_SKB_CB(skb)->seq++;
        TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
index a35018e2d0ba27b14d0b59d3728f7181b1a51161..e1f952fbac48dfdc4f4f75a50a85b4904b93bbe2 100644 (file)
@@ -5171,7 +5171,7 @@ end:
                if (tcp_is_sack(tp))
                        tcp_grow_window(sk, skb, false);
                skb_condense(skb);
-               skb_set_owner_r(skb, sk);
+               tcp_skb_set_owner_r(skb, sk);
        }
 }
 
@@ -5187,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
        tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
        if (!eaten) {
                tcp_add_receive_queue(sk, skb);
-               skb_set_owner_r(skb, sk);
+               tcp_skb_set_owner_r(skb, sk);
        }
        return eaten;
 }
@@ -5504,7 +5504,7 @@ skip_this:
                        __skb_queue_before(list, skb, nskb);
                else
                        __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
-               skb_set_owner_r(nskb, sk);
+               tcp_skb_set_owner_r(nskb, sk);
                mptcp_skb_ext_move(nskb, skb);
 
                /* Copy data, releasing collapsed skbs. */