]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bpf: udp: Avoid socket skips and repeats during iteration
authorJordan Rife <jordan@jrife.io>
Fri, 2 May 2025 16:15:24 +0000 (09:15 -0700)
committerMartin KaFai Lau <martin.lau@kernel.org>
Fri, 2 May 2025 19:07:46 +0000 (12:07 -0700)
Replace the offset-based approach for tracking progress through a bucket
in the UDP table with one based on socket cookies. Remember the cookies
of unprocessed sockets from the last batch and use this list to
pick up where we left off or, in the case that the next socket
disappears between reads, find the first socket after that point that
still exists in the bucket and resume from there.

This approach guarantees that all sockets that existed when iteration
began and continue to exist throughout will be visited exactly once.
Sockets that are added to the table during iteration may or may not be
seen, but if they are they will be seen exactly once.

Signed-off-by: Jordan Rife <jordan@jrife.io>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
net/ipv4/udp.c

index fe1438b2bcbafa0f89bf8f96200e587f9979b80b..358b49caa7b974ddef70da8482f8a35fdb003fa9 100644 (file)
@@ -93,6 +93,7 @@
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/slab.h>
+#include <linux/sock_diag.h>
 #include <net/tcp_states.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
@@ -3415,6 +3416,7 @@ struct bpf_iter__udp {
 
 union bpf_udp_iter_batch_item {
        struct sock *sk;
+       __u64 cookie;
 };
 
 struct bpf_udp_iter_state {
@@ -3422,26 +3424,42 @@ struct bpf_udp_iter_state {
        unsigned int cur_sk;
        unsigned int end_sk;
        unsigned int max_sk;
-       int offset;
        union bpf_udp_iter_batch_item *batch;
 };
 
 static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
                                      unsigned int new_batch_sz, gfp_t flags);
+static struct sock *bpf_iter_udp_resume(struct sock *first_sk,
+                                       union bpf_udp_iter_batch_item *cookies,
+                                       int n_cookies)
+{
+       struct sock *sk = NULL;
+       int i;
+
+       for (i = 0; i < n_cookies; i++) {
+               sk = first_sk;
+               udp_portaddr_for_each_entry_from(sk)
+                       if (cookies[i].cookie == atomic64_read(&sk->sk_cookie))
+                               goto done;
+       }
+done:
+       return sk;
+}
+
 static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
 {
        struct bpf_udp_iter_state *iter = seq->private;
        struct udp_iter_state *state = &iter->state;
+       unsigned int find_cookie, end_cookie;
        struct net *net = seq_file_net(seq);
-       int resume_bucket, resume_offset;
        struct udp_table *udptable;
        unsigned int batch_sks = 0;
+       int resume_bucket;
        int resizes = 0;
        struct sock *sk;
        int err = 0;
 
        resume_bucket = state->bucket;
-       resume_offset = iter->offset;
 
        /* The current batch is done, so advance the bucket. */
        if (iter->cur_sk == iter->end_sk)
@@ -3457,6 +3475,8 @@ again:
         * before releasing the bucket lock. This allows BPF programs that are
         * called in seq_show to acquire the bucket lock if needed.
         */
+       find_cookie = iter->cur_sk;
+       end_cookie = iter->end_sk;
        iter->cur_sk = 0;
        iter->end_sk = 0;
        batch_sks = 0;
@@ -3467,21 +3487,21 @@ again:
                if (hlist_empty(&hslot2->head))
                        goto next_bucket;
 
-               iter->offset = 0;
                spin_lock_bh(&hslot2->lock);
                sk = hlist_entry_safe(hslot2->head.first, struct sock,
                                      __sk_common.skc_portaddr_node);
+               /* Resume from the first (in iteration order) unseen socket from
+                * the last batch that still exists in resume_bucket. Most of
+                * the time this will just be where the last iteration left off
+                * in resume_bucket unless that socket disappeared between
+                * reads.
+                */
+               if (state->bucket == resume_bucket)
+                       sk = bpf_iter_udp_resume(sk, &iter->batch[find_cookie],
+                                                end_cookie - find_cookie);
 fill_batch:
                udp_portaddr_for_each_entry_from(sk) {
                        if (seq_sk_match(seq, sk)) {
-                               /* Resume from the last iterated socket at the
-                                * offset in the bucket before iterator was stopped.
-                                */
-                               if (state->bucket == resume_bucket &&
-                                   iter->offset < resume_offset) {
-                                       ++iter->offset;
-                                       continue;
-                               }
                                if (iter->end_sk < iter->max_sk) {
                                        sock_hold(sk);
                                        iter->batch[iter->end_sk++].sk = sk;
@@ -3548,10 +3568,8 @@ static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        /* Whenever seq_next() is called, the iter->cur_sk is
         * done with seq_show(), so unref the iter->cur_sk.
         */
-       if (iter->cur_sk < iter->end_sk) {
+       if (iter->cur_sk < iter->end_sk)
                sock_put(iter->batch[iter->cur_sk++].sk);
-               ++iter->offset;
-       }
 
        /* After updating iter->cur_sk, check if there are more sockets
         * available in the current bucket batch.
@@ -3621,10 +3639,19 @@ unlock:
 
 static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter)
 {
+       union bpf_udp_iter_batch_item *item;
        unsigned int cur_sk = iter->cur_sk;
+       __u64 cookie;
 
-       while (cur_sk < iter->end_sk)
-               sock_put(iter->batch[cur_sk++].sk);
+       /* Remember the cookies of the sockets we haven't seen yet, so we can
+        * pick up where we left off next time around.
+        */
+       while (cur_sk < iter->end_sk) {
+               item = &iter->batch[cur_sk++];
+               cookie = sock_gen_cookie(item->sk);
+               sock_put(item->sk);
+               item->cookie = cookie;
+       }
 }
 
 static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)