bpf: Allow pre-ordering for bpf cgroup progs

author Yonghong Song <yonghong.song@linux.dev>

Mon, 24 Feb 2025 23:01:16 +0000 (15:01 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 29 May 2025 09:12:41 +0000 (11:12 +0200)
author Yonghong Song <yonghong.song@linux.dev>
Mon, 24 Feb 2025 23:01:16 +0000 (15:01 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 May 2025 09:12:41 +0000 (11:12 +0200)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h

index 7fc69083e7450ffe82d10a9022f22400024d864c..9de7adb6829485267b6cf0d19fcddfc8037ffea2 100644 (file)
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -111,6 +111,7 @@ struct bpf_prog_list {
         struct bpf_prog *prog;
         struct bpf_cgroup_link *link;
         struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+       u32 flags;
  };
  
  int cgroup_bpf_inherit(struct cgroup *cgrp);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 2acf9b33637174bd16b1d12ccc6410c5f55a7ea9..89242184a19376da72d7dac0cea0af38b0702b17 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1207,6 +1207,7 @@ enum bpf_perf_event_type {
  #define BPF_F_BEFORE           (1U << 3)
  #define BPF_F_AFTER            (1U << 4)
  #define BPF_F_ID               (1U << 5)
+#define BPF_F_PREORDER         (1U << 6)
  #define BPF_F_LINK             BPF_F_LINK /* 1 << 13 */
  
  /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index 46e5db65dbc8d8c6591b53dfc77bb689357f33ea..84f58f3d028a3e215bf9d32aaa941d56c7fb2adf 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -369,7 +369,7 @@ static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
  /* count number of elements in the list.
   * it's slow but the list cannot be long
   */
-static u32 prog_list_length(struct hlist_head *head)
+static u32 prog_list_length(struct hlist_head *head, int *preorder_cnt)
  {
         struct bpf_prog_list *pl;
         u32 cnt = 0;
@@ -377,6 +377,8 @@ static u32 prog_list_length(struct hlist_head *head)
         hlist_for_each_entry(pl, head, node) {
                 if (!prog_list_prog(pl))
                         continue;
+               if (preorder_cnt && (pl->flags & BPF_F_PREORDER))
+                       (*preorder_cnt)++;
                 cnt++;
         }
         return cnt;
@@ -400,7 +402,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
  
                 if (flags & BPF_F_ALLOW_MULTI)
                         return true;
-               cnt = prog_list_length(&p->bpf.progs[atype]);
+               cnt = prog_list_length(&p->bpf.progs[atype], NULL);
                 WARN_ON_ONCE(cnt > 1);
                 if (cnt == 1)
                         return !!(flags & BPF_F_ALLOW_OVERRIDE);
@@ -423,12 +425,12 @@ static int compute_effective_progs(struct cgroup *cgrp,
         struct bpf_prog_array *progs;
         struct bpf_prog_list *pl;
         struct cgroup *p = cgrp;
-       int cnt = 0;
+       int i, j, cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart;
  
         /* count number of effective programs by walking parents */
         do {
                 if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
-                       cnt += prog_list_length(&p->bpf.progs[atype]);
+                       cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt);
                 p = cgroup_parent(p);
         } while (p);
  
@@ -439,20 +441,34 @@ static int compute_effective_progs(struct cgroup *cgrp,
         /* populate the array with effective progs */
         cnt = 0;
         p = cgrp;
+       fstart = preorder_cnt;
+       bstart = preorder_cnt - 1;
         do {
                 if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
                         continue;
  
+               init_bstart = bstart;
                 hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
                         if (!prog_list_prog(pl))
                                 continue;
  
-                       item = &progs->items[cnt];
+                       if (pl->flags & BPF_F_PREORDER) {
+                               item = &progs->items[bstart];
+                               bstart--;
+                       } else {
+                               item = &progs->items[fstart];
+                               fstart++;
+                       }
                         item->prog = prog_list_prog(pl);
                         bpf_cgroup_storages_assign(item->cgroup_storage,
                                                    pl->storage);
                         cnt++;
                 }
+
+               /* reverse pre-ordering progs at this cgroup level */
+               for (i = bstart + 1, j = init_bstart; i < j; i++, j--)
+                       swap(progs->items[i], progs->items[j]);
+
         } while ((p = cgroup_parent(p)));
  
         *array = progs;
@@ -663,7 +679,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
                  */
                 return -EPERM;
  
-       if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
+       if (prog_list_length(progs, NULL) >= BPF_CGROUP_MAX_PROGS)
                 return -E2BIG;
  
         pl = find_attach_entry(progs, prog, link, replace_prog,
@@ -698,6 +714,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
  
         pl->prog = prog;
         pl->link = link;
+       pl->flags = flags;
         bpf_cgroup_storages_assign(pl->storage, storage);
         cgrp->bpf.flags[atype] = saved_flags;
  
@@ -1073,7 +1090,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
                                                               lockdep_is_held(&cgroup_mutex));
                         total_cnt += bpf_prog_array_length(effective);
                 } else {
-                       total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
+                       total_cnt += prog_list_length(&cgrp->bpf.progs[atype], NULL);
                 }
         }
  
@@ -1105,7 +1122,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
                         u32 id;
  
                         progs = &cgrp->bpf.progs[atype];
-                       cnt = min_t(int, prog_list_length(progs), total_cnt);
+                       cnt = min_t(int, prog_list_length(progs, NULL), total_cnt);
                         i = 0;
                         hlist_for_each_entry(pl, progs, node) {
                                 prog = prog_list_prog(pl);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 87f886ed33bc3daaf5cd5ec33bcd7c868a74d231..8c42c094f0d1edbe8800644d21d2b3757f358391 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4169,7 +4169,8 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
  #define BPF_F_ATTACH_MASK_BASE \
         (BPF_F_ALLOW_OVERRIDE | \
          BPF_F_ALLOW_MULTI |    \
-        BPF_F_REPLACE)
+        BPF_F_REPLACE |        \
+        BPF_F_PREORDER)
  
  #define BPF_F_ATTACH_MASK_MPROG        \
         (BPF_F_REPLACE |        \
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 2acf9b33637174bd16b1d12ccc6410c5f55a7ea9..89242184a19376da72d7dac0cea0af38b0702b17 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1207,6 +1207,7 @@ enum bpf_perf_event_type {
  #define BPF_F_BEFORE           (1U << 3)
  #define BPF_F_AFTER            (1U << 4)
  #define BPF_F_ID               (1U << 5)
+#define BPF_F_PREORDER         (1U << 6)
  #define BPF_F_LINK             BPF_F_LINK /* 1 << 13 */
  
  /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
author	Yonghong Song <yonghong.song@linux.dev>
	Mon, 24 Feb 2025 23:01:16 +0000 (15:01 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 29 May 2025 09:12:41 +0000 (11:12 +0200)
include/linux/bpf-cgroup.h		patch \| blob \| blame \| history
include/uapi/linux/bpf.h		patch \| blob \| blame \| history
kernel/bpf/cgroup.c		patch \| blob \| blame \| history
kernel/bpf/syscall.c		patch \| blob \| blame \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| blame \| history