]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bpf: Take mmap_lock in zap_pages()
authorAlexei Starovoitov <ast@kernel.org>
Thu, 28 May 2026 22:20:14 +0000 (15:20 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Thu, 4 Jun 2026 16:46:54 +0000 (09:46 -0700)
zap_vma_range() requires the owning mm's mmap_lock to be held.

Taking mmap_read_lock under arena->lock would AB-BA against
arena_vm_close() and arena_map_mmap(), both of which run with
mmap_write_lock held and then acquire arena->lock. Instead drop
arena->lock, mmget_not_zero() the vma's mm, take mmap_read_lock, and
re-resolve the vma via find_vma() since it may have been unmapped or
replaced while waiting.

Track processed vmls with a per-call generation in vml->zap_gen and
serialize zap_pages() callers with a new arena->zap_mutex so
concurrent callers on different uaddr ranges do not mark each other's
vmls processed before the zap is done.

Reported-by: David Hildenbrand <david@kernel.org>
Fixes: 317460317a02 ("bpf: Introduce bpf_arena.")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Link: https://lore.kernel.org/r/20260528222014.38980-1-alexei.starovoitov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
kernel/bpf/arena.c

index 1727503b25d8441a1f81796f6701f082d1a8e899..9b2dea229b3853d75d2fbfac99be8b8e42713b09 100644 (file)
@@ -60,6 +60,8 @@ struct bpf_arena {
        struct list_head vma_list;
        /* protects vma_list */
        struct mutex lock;
+       u64 zap_gen;
+       struct mutex zap_mutex;
        struct irq_work     free_irq;
        struct work_struct  free_work;
        struct llist_head   free_spans;
@@ -289,6 +291,7 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
        if (err)
                goto err_free_scratch;
        mutex_init(&arena->lock);
+       mutex_init(&arena->zap_mutex);
        raw_res_spin_lock_init(&arena->spinlock);
        err = populate_pgtable_except_pte(arena);
        if (err)
@@ -391,6 +394,7 @@ struct vma_list {
        struct vm_area_struct *vma;
        struct list_head head;
        refcount_t mmap_count;
+       u64 zap_gen;
 };
 
 static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
@@ -403,6 +407,7 @@ static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
        refcount_set(&vml->mmap_count, 1);
        vma->vm_private_data = vml;
        vml->vma = vma;
+       vml->zap_gen = 0;
        list_add(&vml->head, &arena->vma_list);
        return 0;
 }
@@ -746,12 +751,60 @@ out_free_pages:
  */
 static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
 {
+       unsigned long size = (unsigned long)page_cnt << PAGE_SHIFT;
+       struct vm_area_struct *vma;
+       struct mm_struct *mm;
        struct vma_list *vml;
+       unsigned long vm_start;
+       u64 my_gen;
 
-       guard(mutex)(&arena->lock);
-       /* iterate link list under lock */
-       list_for_each_entry(vml, &arena->vma_list, head)
-               zap_vma_range(vml->vma, uaddr, PAGE_SIZE * page_cnt);
+       /*
+        * Taking mmap_read_lock() under arena->lock would deadlock against
+        * arena_vm_close(), which runs with mmap_write_lock held and then
+        * acquires arena->lock. Drop arena->lock for mmap_read_lock().
+        *
+        * Use per-call my_gen, recorded in vml->zap_gen, to remember which
+        * vmls this invocation has already processed across the lock drop.
+        * Hold zap_mutex around the whole walk so concurrent zap_pages()
+        * callers cannot overwrite each other's marks on shared vmls --
+        * otherwise call B's mark would make call A skip a vml that A has
+        * not yet zapped for A's uaddr range.
+        */
+       mutex_lock(&arena->zap_mutex);
+       mutex_lock(&arena->lock);
+       my_gen = ++arena->zap_gen;
+       for (;;) {
+               mm = NULL;
+               list_for_each_entry(vml, &arena->vma_list, head) {
+                       if (vml->zap_gen >= my_gen)
+                               continue;
+                       vml->zap_gen = my_gen;
+                       if (!mmget_not_zero(vml->vma->vm_mm))
+                               continue;
+                       mm = vml->vma->vm_mm;
+                       vm_start = vml->vma->vm_start;
+                       break;
+               }
+               if (!mm)
+                       break;
+               mutex_unlock(&arena->lock);
+
+               mmap_read_lock(mm);
+               /*
+                * Re-resolve: while we waited the VMA could have been unmapped
+                * and a different mapping installed at the same address.
+                */
+               vma = find_vma(mm, vm_start);
+               if (vma && vma->vm_start == vm_start &&
+                   vma->vm_file && vma->vm_file->private_data == &arena->map)
+                       zap_vma_range(vma, uaddr, size);
+               mmap_read_unlock(mm);
+               mmput(mm);
+
+               mutex_lock(&arena->lock);
+       }
+       mutex_unlock(&arena->lock);
+       mutex_unlock(&arena->zap_mutex);
 }
 
 static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, bool sleepable)