bpf: Allow pre-ordering for bpf cgroup progs

author Yonghong Song <yonghong.song@linux.dev>

Mon, 24 Feb 2025 23:01:16 +0000 (15:01 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 4 Jun 2025 12:41:58 +0000 (14:41 +0200)
author Yonghong Song <yonghong.song@linux.dev>
Mon, 24 Feb 2025 23:01:16 +0000 (15:01 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 4 Jun 2025 12:41:58 +0000 (14:41 +0200)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h

index d4f2c8706042cd2e079775a1fa643cfc7793bfba..2331cd8174fe3f6641d22d6edc666d73cf8c357f 100644 (file)
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -106,6 +106,7 @@ struct bpf_prog_list {
         struct bpf_prog *prog;
         struct bpf_cgroup_link *link;
         struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+       u32 flags;
  };
  
  int cgroup_bpf_inherit(struct cgroup *cgrp);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 431bc700bcfb93629c660043dcfcb05b7f7dd3c1..c7f904a72af2178343826f220b03610482bfa13f 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1140,6 +1140,7 @@ enum bpf_perf_event_type {
  #define BPF_F_BEFORE           (1U << 3)
  #define BPF_F_AFTER            (1U << 4)
  #define BPF_F_ID               (1U << 5)
+#define BPF_F_PREORDER         (1U << 6)
  #define BPF_F_LINK             BPF_F_LINK /* 1 << 13 */
  
  /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index cf2eb0895d403c967d63236a0984f15a67ea8b4e..684fb450ad086f9f446daca29f3326f9b0a211a3 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -369,7 +369,7 @@ static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
  /* count number of elements in the list.
   * it's slow but the list cannot be long
   */
-static u32 prog_list_length(struct hlist_head *head)
+static u32 prog_list_length(struct hlist_head *head, int *preorder_cnt)
  {
         struct bpf_prog_list *pl;
         u32 cnt = 0;
@@ -377,6 +377,8 @@ static u32 prog_list_length(struct hlist_head *head)
         hlist_for_each_entry(pl, head, node) {
                 if (!prog_list_prog(pl))
                         continue;
+               if (preorder_cnt && (pl->flags & BPF_F_PREORDER))
+                       (*preorder_cnt)++;
                 cnt++;
         }
         return cnt;
@@ -400,7 +402,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
  
                 if (flags & BPF_F_ALLOW_MULTI)
                         return true;
-               cnt = prog_list_length(&p->bpf.progs[atype]);
+               cnt = prog_list_length(&p->bpf.progs[atype], NULL);
                 WARN_ON_ONCE(cnt > 1);
                 if (cnt == 1)
                         return !!(flags & BPF_F_ALLOW_OVERRIDE);
@@ -423,12 +425,12 @@ static int compute_effective_progs(struct cgroup *cgrp,
         struct bpf_prog_array *progs;
         struct bpf_prog_list *pl;
         struct cgroup *p = cgrp;
-       int cnt = 0;
+       int i, j, cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart;
  
         /* count number of effective programs by walking parents */
         do {
                 if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
-                       cnt += prog_list_length(&p->bpf.progs[atype]);
+                       cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt);
                 p = cgroup_parent(p);
         } while (p);
  
@@ -439,20 +441,34 @@ static int compute_effective_progs(struct cgroup *cgrp,
         /* populate the array with effective progs */
         cnt = 0;
         p = cgrp;
+       fstart = preorder_cnt;
+       bstart = preorder_cnt - 1;
         do {
                 if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
                         continue;
  
+               init_bstart = bstart;
                 hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
                         if (!prog_list_prog(pl))
                                 continue;
  
-                       item = &progs->items[cnt];
+                       if (pl->flags & BPF_F_PREORDER) {
+                               item = &progs->items[bstart];
+                               bstart--;
+                       } else {
+                               item = &progs->items[fstart];
+                               fstart++;
+                       }
                         item->prog = prog_list_prog(pl);
                         bpf_cgroup_storages_assign(item->cgroup_storage,
                                                    pl->storage);
                         cnt++;
                 }
+
+               /* reverse pre-ordering progs at this cgroup level */
+               for (i = bstart + 1, j = init_bstart; i < j; i++, j--)
+                       swap(progs->items[i], progs->items[j]);
+
         } while ((p = cgroup_parent(p)));
  
         *array = progs;
@@ -663,7 +679,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
                  */
                 return -EPERM;
  
-       if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
+       if (prog_list_length(progs, NULL) >= BPF_CGROUP_MAX_PROGS)
                 return -E2BIG;
  
         pl = find_attach_entry(progs, prog, link, replace_prog,
@@ -698,6 +714,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
  
         pl->prog = prog;
         pl->link = link;
+       pl->flags = flags;
         bpf_cgroup_storages_assign(pl->storage, storage);
         cgrp->bpf.flags[atype] = saved_flags;
  
@@ -1073,7 +1090,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
                                                               lockdep_is_held(&cgroup_mutex));
                         total_cnt += bpf_prog_array_length(effective);
                 } else {
-                       total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
+                       total_cnt += prog_list_length(&cgrp->bpf.progs[atype], NULL);
                 }
         }
  
@@ -1105,7 +1122,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
                         u32 id;
  
                         progs = &cgrp->bpf.progs[atype];
-                       cnt = min_t(int, prog_list_length(progs), total_cnt);
+                       cnt = min_t(int, prog_list_length(progs, NULL), total_cnt);
                         i = 0;
                         hlist_for_each_entry(pl, progs, node) {
                                 prog = prog_list_prog(pl);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 5a8c5a4ef1134d7f49b59647b7d928556632eb02..b66349f892f25ea9096c8694c3390d0824e5ac3f 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3900,7 +3900,8 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
  #define BPF_F_ATTACH_MASK_BASE \
         (BPF_F_ALLOW_OVERRIDE | \
          BPF_F_ALLOW_MULTI |    \
-        BPF_F_REPLACE)
+        BPF_F_REPLACE |        \
+        BPF_F_PREORDER)
  
  #define BPF_F_ATTACH_MASK_MPROG        \
         (BPF_F_REPLACE |        \
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 977ec094bc2a6c75dd8d37d037ff89a39c8d3492..2a90f04a4160db9b6203b694c0f5ae1841636333 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1140,6 +1140,7 @@ enum bpf_perf_event_type {
  #define BPF_F_BEFORE           (1U << 3)
  #define BPF_F_AFTER            (1U << 4)
  #define BPF_F_ID               (1U << 5)
+#define BPF_F_PREORDER         (1U << 6)
  #define BPF_F_LINK             BPF_F_LINK /* 1 << 13 */
  
  /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
author	Yonghong Song <yonghong.song@linux.dev>
	Mon, 24 Feb 2025 23:01:16 +0000 (15:01 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 4 Jun 2025 12:41:58 +0000 (14:41 +0200)
include/linux/bpf-cgroup.h		patch \| blob \| blame \| history
include/uapi/linux/bpf.h		patch \| blob \| blame \| history
kernel/bpf/cgroup.c		patch \| blob \| blame \| history
kernel/bpf/syscall.c		patch \| blob \| blame \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| blame \| history