bpf: Replace bpf_map_kmalloc_node() with kmalloc_nolock() to allocate bpf_async_cb...

author Alexei Starovoitov <ast@kernel.org>

Wed, 15 Oct 2025 00:07:00 +0000 (17:07 -0700)

committer Daniel Borkmann <daniel@iogearbox.net>

Wed, 15 Oct 2025 10:22:22 +0000 (12:22 +0200)
author Alexei Starovoitov <ast@kernel.org>
Wed, 15 Oct 2025 00:07:00 +0000 (17:07 -0700)
committer Daniel Borkmann <daniel@iogearbox.net>
Wed, 15 Oct 2025 10:22:22 +0000 (12:22 +0200)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index a98c83346134742f2ace10c4240f473065e7855b..d808253f2e945dd7f0c0008743b11450416445db 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2499,6 +2499,8 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
  #ifdef CONFIG_MEMCG
  void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
                            int node);
+void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
+                            int node);
  void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags);
  void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
                        gfp_t flags);
@@ -2511,6 +2513,8 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
   */
  #define bpf_map_kmalloc_node(_map, _size, _flags, _node)       \
                 kmalloc_node(_size, _flags, _node)
+#define bpf_map_kmalloc_nolock(_map, _size, _flags, _node)     \
+               kmalloc_nolock(_size, _flags, _node)
  #define bpf_map_kzalloc(_map, _size, _flags)                   \
                 kzalloc(_size, _flags)
  #define bpf_map_kvcalloc(_map, _n, _size, _flags)              \
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c

index c9fab9a356dfc18b0eb7fcfd435c9c9372add77d..8eb117c5281769d0d5f9b37a9aed70e0e7147077 100644 (file)
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1215,13 +1215,20 @@ static void bpf_wq_work(struct work_struct *work)
         rcu_read_unlock_trace();
  }
  
+static void bpf_async_cb_rcu_free(struct rcu_head *rcu)
+{
+       struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu);
+
+       kfree_nolock(cb);
+}
+
  static void bpf_wq_delete_work(struct work_struct *work)
  {
         struct bpf_work *w = container_of(work, struct bpf_work, delete_work);
  
         cancel_work_sync(&w->work);
  
-       kfree_rcu(w, cb.rcu);
+       call_rcu(&w->cb.rcu, bpf_async_cb_rcu_free);
  }
  
  static void bpf_timer_delete_work(struct work_struct *work)
@@ -1230,13 +1237,13 @@ static void bpf_timer_delete_work(struct work_struct *work)
  
         /* Cancel the timer and wait for callback to complete if it was running.
          * If hrtimer_cancel() can be safely called it's safe to call
-        * kfree_rcu(t) right after for both preallocated and non-preallocated
+        * call_rcu() right after for both preallocated and non-preallocated
          * maps.  The async->cb = NULL was already done and no code path can see
          * address 't' anymore. Timer if armed for existing bpf_hrtimer before
          * bpf_timer_cancel_and_free will have been cancelled.
          */
         hrtimer_cancel(&t->timer);
-       kfree_rcu(t, cb.rcu);
+       call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free);
  }
  
  static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
@@ -1270,11 +1277,7 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
                 goto out;
         }
  
-       /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until
-        * kmalloc_nolock() is available, avoid locking issues by using
-        * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM).
-        */
-       cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node);
+       cb = bpf_map_kmalloc_nolock(map, size, 0, map->numa_node);
         if (!cb) {
                 ret = -ENOMEM;
                 goto out;
@@ -1315,7 +1318,7 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
                  * or pinned in bpffs.
                  */
                 WRITE_ONCE(async->cb, NULL);
-               kfree(cb);
+               kfree_nolock(cb);
                 ret = -EPERM;
         }
  out:
@@ -1580,7 +1583,7 @@ void bpf_timer_cancel_and_free(void *val)
          * timer _before_ calling us, such that failing to cancel it here will
          * cause it to possibly use struct hrtimer after freeing bpf_hrtimer.
          * Therefore, we _need_ to cancel any outstanding timers before we do
-        * kfree_rcu, even though no more timers can be armed.
+        * call_rcu, even though no more timers can be armed.
          *
          * Moreover, we need to schedule work even if timer does not belong to
          * the calling callback_fn, as on two different CPUs, we can end up in a
@@ -1607,7 +1610,7 @@ void bpf_timer_cancel_and_free(void *val)
                  * completion.
                  */
                 if (hrtimer_try_to_cancel(&t->timer) >= 0)
-                       kfree_rcu(t, cb.rcu);
+                       call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free);
                 else
                         queue_work(system_dfl_wq, &t->cb.delete_work);
         } else {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 2a9456a3e73049aad1d9f84efde4a8aef6914aca..8a129746bd6cc77c3eea8ce5abd500f99a81326c 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -520,6 +520,21 @@ void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
         return ptr;
  }
  
+void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags,
+                            int node)
+{
+       struct mem_cgroup *memcg, *old_memcg;
+       void *ptr;
+
+       memcg = bpf_map_get_memcg(map);
+       old_memcg = set_active_memcg(memcg);
+       ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node);
+       set_active_memcg(old_memcg);
+       mem_cgroup_put(memcg);
+
+       return ptr;
+}
+
  void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
  {
         struct mem_cgroup *memcg, *old_memcg;
author	Alexei Starovoitov <ast@kernel.org>
	Wed, 15 Oct 2025 00:07:00 +0000 (17:07 -0700)
committer	Daniel Borkmann <daniel@iogearbox.net>
	Wed, 15 Oct 2025 10:22:22 +0000 (12:22 +0200)
include/linux/bpf.h		patch \| blob \| blame \| history
kernel/bpf/helpers.c		patch \| blob \| blame \| history
kernel/bpf/syscall.c		patch \| blob \| blame \| history