]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
tcp_bpf: Add sk_rmem_alloc related logic for tcp_bpf ingress redirection
authorZijian Zhang <zijianzhang@bytedance.com>
Tue, 10 Dec 2024 01:20:39 +0000 (01:20 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 9 Jan 2025 12:28:37 +0000 (13:28 +0100)
[ Upstream commit d888b7af7c149c115dd6ac772cc11c375da3e17c ]

When we do sk_psock_verdict_apply->sk_psock_skb_ingress, an sk_msg will
be created out of the skb, and the rmem accounting of the sk_msg will be
handled by the skb.

For skmsgs in __SK_REDIRECT case of tcp_bpf_send_verdict, when redirecting
to the ingress of a socket, although we sk_rmem_schedule and add sk_msg to
the ingress_msg of sk_redir, we do not update sk_rmem_alloc. As a result,
except for the global memory limit, the rmem of sk_redir is nearly
unlimited. Thus, add sk_rmem_alloc related logic to limit the recv buffer.

Since the function sk_msg_recvmsg and __sk_psock_purge_ingress_msg are
used in these two paths. We use "msg->skb" to test whether the sk_msg is
skb backed up. If it's not, we shall do the memory accounting explicitly.

Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20241210012039.1669389-3-zijianzhang@bytedance.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
include/linux/skmsg.h
net/core/skmsg.c
net/ipv4/tcp_bpf.c

index 422b391d931fec465033bae088420ec4c9966d38..5d3e4d4d9438e200b943268272441a9b03ca2ca3 100644 (file)
@@ -313,17 +313,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
        kfree_skb(skb);
 }
 
-static inline void sk_psock_queue_msg(struct sk_psock *psock,
+static inline bool sk_psock_queue_msg(struct sk_psock *psock,
                                      struct sk_msg *msg)
 {
+       bool ret;
+
        spin_lock_bh(&psock->ingress_lock);
-       if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+       if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
                list_add_tail(&msg->list, &psock->ingress_msg);
-       else {
+               ret = true;
+       } else {
                sk_msg_free(psock->sk, msg);
                kfree(msg);
+               ret = false;
        }
        spin_unlock_bh(&psock->ingress_lock);
+       return ret;
 }
 
 static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
index f8563d4da0b100dad47fc397e55d70a64a8c6eb3..a5947aa5598375e85e79f70e7fca086fadd96632 100644 (file)
@@ -445,8 +445,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
                        if (likely(!peek)) {
                                sge->offset += copy;
                                sge->length -= copy;
-                               if (!msg_rx->skb)
+                               if (!msg_rx->skb) {
                                        sk_mem_uncharge(sk, copy);
+                                       atomic_sub(copy, &sk->sk_rmem_alloc);
+                               }
                                msg_rx->sg.size -= copy;
 
                                if (!sge->length) {
@@ -761,6 +763,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
 
        list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
                list_del(&msg->list);
+               if (!msg->skb)
+                       atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc);
                sk_msg_free(psock->sk, msg);
                kfree(msg);
        }
index dc1291827381c9ffe7ee27cc71d7a4c376ac55e2..9e24542251b1ced0a8296c01ef31e8872ffcf99b 100644 (file)
@@ -38,6 +38,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
                }
 
                sk_mem_charge(sk, size);
+               atomic_add(size, &sk->sk_rmem_alloc);
                sk_msg_xfer(tmp, msg, i, size);
                copied += size;
                if (sge->length)
@@ -56,7 +57,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
 
        if (!ret) {
                msg->sg.start = i;
-               sk_psock_queue_msg(psock, tmp);
+               if (!sk_psock_queue_msg(psock, tmp))
+                       atomic_sub(copied, &sk->sk_rmem_alloc);
                sk_psock_data_ready(sk, psock);
        } else {
                sk_msg_free(sk, tmp);