]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: avoid extra access to sk->sk_wmem_alloc in sock_wfree()
authorEric Dumazet <edumazet@google.com>
Fri, 17 Oct 2025 13:37:12 +0000 (13:37 +0000)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 21 Oct 2025 13:56:21 +0000 (15:56 +0200)
UDP TX packets destructor is sock_wfree().

It suffers from a cache line bouncing in sock_def_write_space_wfree().

Instead of reading sk->sk_wmem_alloc after we just did an atomic RMW
on it, use __refcount_sub_and_test() to get the old value for free,
and pass the new value to sock_def_write_space_wfree().

Add __sock_writeable() helper.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251017133712.2842665-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
include/net/sock.h
net/core/sock.c

index 5c564f114ae9500c447139555e60291d3c9736e0..01ce231603db0cc4e82ae27271df7734112c038d 100644 (file)
@@ -2607,12 +2607,16 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
 
 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 
+static inline bool __sock_writeable(const struct sock *sk, int wmem_alloc)
+{
+       return wmem_alloc < (READ_ONCE(sk->sk_sndbuf) >> 1);
+}
 /*
  *     Default write policy as shown to user space via poll/select/SIGIO
  */
 static inline bool sock_writeable(const struct sock *sk)
 {
-       return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
+       return __sock_writeable(sk, refcount_read(&sk->sk_wmem_alloc));
 }
 
 static inline gfp_t gfp_any(void)
index b78533fb926866c51a85bda3fc1bfafe29cd4f5a..a99132cc0965604a8d4e30e626837606afe316f4 100644 (file)
 static DEFINE_MUTEX(proto_list_mutex);
 static LIST_HEAD(proto_list);
 
-static void sock_def_write_space_wfree(struct sock *sk);
+static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc);
 static void sock_def_write_space(struct sock *sk);
 
 /**
@@ -2659,16 +2659,18 @@ EXPORT_SYMBOL_GPL(sk_setup_caps);
  */
 void sock_wfree(struct sk_buff *skb)
 {
-       struct sock *sk = skb->sk;
        unsigned int len = skb->truesize;
+       struct sock *sk = skb->sk;
        bool free;
+       int old;
 
        if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
                if (sock_flag(sk, SOCK_RCU_FREE) &&
                    sk->sk_write_space == sock_def_write_space) {
                        rcu_read_lock();
-                       free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
-                       sock_def_write_space_wfree(sk);
+                       free = __refcount_sub_and_test(len, &sk->sk_wmem_alloc,
+                                                      &old);
+                       sock_def_write_space_wfree(sk, old - len);
                        rcu_read_unlock();
                        if (unlikely(free))
                                __sk_free(sk);
@@ -3612,12 +3614,12 @@ static void sock_def_write_space(struct sock *sk)
  * for SOCK_RCU_FREE sockets under RCU read section and after putting
  * ->sk_wmem_alloc.
  */
-static void sock_def_write_space_wfree(struct sock *sk)
+static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc)
 {
        /* Do not wake up a writer until he can make "significant"
         * progress.  --DaveM
         */
-       if (sock_writeable(sk)) {
+       if (__sock_writeable(sk, wmem_alloc)) {
                struct socket_wq *wq = rcu_dereference(sk->sk_wq);
 
                /* rely on refcount_sub from sock_wfree() */