]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bpf: add new BPF_CGROUP_ITER_CHILDREN control option
authorMatt Bobrowski <mattbobrowski@google.com>
Tue, 27 Jan 2026 08:51:10 +0000 (08:51 +0000)
committerAlexei Starovoitov <ast@kernel.org>
Tue, 27 Jan 2026 17:05:54 +0000 (09:05 -0800)
Currently, the BPF cgroup iterator supports walking descendants in
either pre-order (BPF_CGROUP_ITER_DESCENDANTS_PRE) or post-order
(BPF_CGROUP_ITER_DESCENDANTS_POST). These modes perform an exhaustive
depth-first search (DFS) of the hierarchy. In scenarios where a BPF
program may need to inspect only the direct children of a given parent
cgroup, a full DFS is unnecessarily expensive.

This patch introduces a new BPF cgroup iterator control option,
BPF_CGROUP_ITER_CHILDREN. This control option restricts the traversal
to the immediate children of a specified parent cgroup, allowing for
more targeted and efficient iteration, particularly when exhaustive
depth-first search (DFS) traversal is not required.

Signed-off-by: Matt Bobrowski <mattbobrowski@google.com>
Link: https://lore.kernel.org/r/20260127085112.3608687-1-mattbobrowski@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/uapi/linux/bpf.h
kernel/bpf/cgroup_iter.c
tools/include/uapi/linux/bpf.h

index 44e7dbc278e37179f50a57afbaf30e776fe8644b..c8d400b7680a81236f1b352b38fef4bdc099cfb5 100644 (file)
@@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order {
        BPF_CGROUP_ITER_DESCENDANTS_PRE,        /* walk descendants in pre-order. */
        BPF_CGROUP_ITER_DESCENDANTS_POST,       /* walk descendants in post-order. */
        BPF_CGROUP_ITER_ANCESTORS_UP,           /* walk ancestors upward. */
+       /*
+        * Walks the immediate children of the specified parent
+        * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE,
+        * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP
+        * the iterator does not include the specified parent as one of the
+        * returned iterator elements.
+        */
+       BPF_CGROUP_ITER_CHILDREN,
 };
 
 union bpf_iter_link_info {
index f04a468cf6a72688121ced9e4ddcf0555b7758c5..fd51fe3d92cca4368ad19f49c90680f164f44cb4 100644 (file)
@@ -8,12 +8,13 @@
 
 #include "../cgroup/cgroup-internal.h"  /* cgroup_mutex and cgroup_is_dead */
 
-/* cgroup_iter provides four modes of traversal to the cgroup hierarchy.
+/* cgroup_iter provides five modes of traversal to the cgroup hierarchy.
  *
  *  1. Walk the descendants of a cgroup in pre-order.
  *  2. Walk the descendants of a cgroup in post-order.
  *  3. Walk the ancestors of a cgroup.
  *  4. Show the given cgroup only.
+ *  5. Walk the children of a given parent cgroup.
  *
  * For walking descendants, cgroup_iter can walk in either pre-order or
  * post-order. For walking ancestors, the iter walks up from a cgroup to
@@ -78,6 +79,8 @@ static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos)
                return css_next_descendant_pre(NULL, p->start_css);
        else if (p->order == BPF_CGROUP_ITER_DESCENDANTS_POST)
                return css_next_descendant_post(NULL, p->start_css);
+       else if (p->order == BPF_CGROUP_ITER_CHILDREN)
+               return css_next_child(NULL, p->start_css);
        else /* BPF_CGROUP_ITER_SELF_ONLY and BPF_CGROUP_ITER_ANCESTORS_UP */
                return p->start_css;
 }
@@ -113,6 +116,8 @@ static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
                return css_next_descendant_post(curr, p->start_css);
        else if (p->order == BPF_CGROUP_ITER_ANCESTORS_UP)
                return curr->parent;
+       else if (p->order == BPF_CGROUP_ITER_CHILDREN)
+               return css_next_child(curr, p->start_css);
        else  /* BPF_CGROUP_ITER_SELF_ONLY */
                return NULL;
 }
@@ -200,11 +205,16 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog,
        int order = linfo->cgroup.order;
        struct cgroup *cgrp;
 
-       if (order != BPF_CGROUP_ITER_DESCENDANTS_PRE &&
-           order != BPF_CGROUP_ITER_DESCENDANTS_POST &&
-           order != BPF_CGROUP_ITER_ANCESTORS_UP &&
-           order != BPF_CGROUP_ITER_SELF_ONLY)
+       switch (order) {
+       case BPF_CGROUP_ITER_DESCENDANTS_PRE:
+       case BPF_CGROUP_ITER_DESCENDANTS_POST:
+       case BPF_CGROUP_ITER_ANCESTORS_UP:
+       case BPF_CGROUP_ITER_SELF_ONLY:
+       case BPF_CGROUP_ITER_CHILDREN:
+               break;
+       default:
                return -EINVAL;
+       }
 
        if (fd && id)
                return -EINVAL;
@@ -257,6 +267,8 @@ show_order:
                seq_puts(seq, "order: descendants_post\n");
        else if (aux->cgroup.order == BPF_CGROUP_ITER_ANCESTORS_UP)
                seq_puts(seq, "order: ancestors_up\n");
+       else if (aux->cgroup.order == BPF_CGROUP_ITER_CHILDREN)
+               seq_puts(seq, "order: children\n");
        else /* BPF_CGROUP_ITER_SELF_ONLY */
                seq_puts(seq, "order: self_only\n");
 }
@@ -320,6 +332,7 @@ __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it,
        case BPF_CGROUP_ITER_DESCENDANTS_PRE:
        case BPF_CGROUP_ITER_DESCENDANTS_POST:
        case BPF_CGROUP_ITER_ANCESTORS_UP:
+       case BPF_CGROUP_ITER_CHILDREN:
                break;
        default:
                return -EINVAL;
@@ -345,6 +358,9 @@ __bpf_kfunc struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *i
        case BPF_CGROUP_ITER_DESCENDANTS_POST:
                kit->pos = css_next_descendant_post(kit->pos, kit->start);
                break;
+       case BPF_CGROUP_ITER_CHILDREN:
+               kit->pos = css_next_child(kit->pos, kit->start);
+               break;
        case BPF_CGROUP_ITER_ANCESTORS_UP:
                kit->pos = kit->pos ? kit->pos->parent : kit->start;
        }
index 3ca7d76e05f04457ae09d1b09680299f135fa6e4..5e38b4887de6ae59fe0a62511757be2d1019eded 100644 (file)
@@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order {
        BPF_CGROUP_ITER_DESCENDANTS_PRE,        /* walk descendants in pre-order. */
        BPF_CGROUP_ITER_DESCENDANTS_POST,       /* walk descendants in post-order. */
        BPF_CGROUP_ITER_ANCESTORS_UP,           /* walk ancestors upward. */
+       /*
+        * Walks the immediate children of the specified parent
+        * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE,
+        * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP
+        * the iterator does not include the specified parent as one of the
+        * returned iterator elements.
+        */
+       BPF_CGROUP_ITER_CHILDREN,
 };
 
 union bpf_iter_link_info {