]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
bpf, sockmap: Ensure SO_RCVBUF memory is observed on ingress redirect
authorJohn Fastabend <john.fastabend@gmail.com>
Mon, 16 Nov 2020 22:28:06 +0000 (14:28 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 24 Nov 2020 12:39:03 +0000 (13:39 +0100)
[ Upstream commit 36cd0e696a832a00247fca522034703566ac8885 ]

Fix sockmap sk_skb programs so that they observe sk_rcvbuf limits. This
allows users to tune SO_RCVBUF and sockmap will honor them.

We can refactor the if(charge) case out in later patches. But, keep this
fix to the point.

Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path")
Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/160556568657.73229.8404601585878439060.stgit@john-XPS-13-9370
Signed-off-by: Sasha Levin <sashal@kernel.org>
net/core/skmsg.c
net/ipv4/tcp_bpf.c

index 30ddca6db6c6b199568a99e9233cecc9ed65f54a..f41b06e60ad9036117df17916b36238ac9648f2e 100644 (file)
@@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
        struct scatterlist *sge = sk_msg_elem(msg, i);
        u32 len = sge->length;
 
-       if (charge)
-               sk_mem_uncharge(sk, len);
-       if (!msg->skb)
+       /* When the skb owns the memory we free it from consume_skb path. */
+       if (!msg->skb) {
+               if (charge)
+                       sk_mem_uncharge(sk, len);
                put_page(sg_page(sge));
+       }
        memset(sge, 0, sizeof(*sge));
        return len;
 }
@@ -403,6 +405,9 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
        int copied = 0, num_sge;
        struct sk_msg *msg;
 
+       if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+               return -EAGAIN;
+
        msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
        if (unlikely(!msg))
                return -EAGAIN;
@@ -418,7 +423,14 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
                return num_sge;
        }
 
-       sk_mem_charge(sk, skb->len);
+       /* This will transition ownership of the data from the socket where
+        * the BPF program was run initiating the redirect to the socket
+        * we will eventually receive this data on. The data will be released
+        * from skb_consume found in __tcp_bpf_recvmsg() after its been copied
+        * into user buffers.
+        */
+       skb_set_owner_r(skb, sk);
+
        copied = skb->len;
        msg->sg.start = 0;
        msg->sg.size = copied;
index d85ba32dc6e7a6765fe524ffd5b012b0649e1c74..24e1be45d4cd7991d0d7b755cf546f59056151e1 100644 (file)
@@ -45,7 +45,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
                        if (likely(!peek)) {
                                sge->offset += copy;
                                sge->length -= copy;
-                               sk_mem_uncharge(sk, copy);
+                               if (!msg_rx->skb)
+                                       sk_mem_uncharge(sk, copy);
                                msg_rx->sg.size -= copy;
 
                                if (!sge->length) {