]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bpf: return VMA snapshot from task_vma iterator
authorPuranjay Mohan <puranjay@kernel.org>
Wed, 8 Apr 2026 15:45:37 +0000 (08:45 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 10 Apr 2026 19:05:16 +0000 (12:05 -0700)
Holding the per-VMA lock across the BPF program body creates a lock
ordering problem when helpers acquire locks that depend on mmap_lock:

  vm_lock -> i_rwsem -> mmap_lock -> vm_lock

Snapshot the VMA under the per-VMA lock in _next() via memcpy(), then
drop the lock before returning. The BPF program accesses only the
snapshot.

The verifier only trusts vm_mm and vm_file pointers (see
BTF_TYPE_SAFE_TRUSTED_OR_NULL in verifier.c). vm_file is reference-
counted with get_file() under the lock and released via fput() on the
next iteration or in _destroy(). vm_mm is already correct because
lock_vma_under_rcu() verifies vma->vm_mm == mm. All other pointers
are left as-is by memcpy() since the verifier treats them as untrusted.

Fixes: 4ac454682158 ("bpf: Introduce task_vma open-coded iterator kfuncs")
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
Link: https://lore.kernel.org/r/20260408154539.3832150-4-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
kernel/bpf/task_iter.c

index 87e87f18913d9805e81d920cf29d5e5d46e1bc94..e791ae065c39bfa8819ce01000d2c53400edeb75 100644 (file)
@@ -808,7 +808,7 @@ static inline void bpf_iter_mmput_async(struct mm_struct *mm)
 struct bpf_iter_task_vma_kern_data {
        struct task_struct *task;
        struct mm_struct *mm;
-       struct vm_area_struct *locked_vma;
+       struct vm_area_struct snapshot;
        u64 next_addr;
 };
 
@@ -842,7 +842,7 @@ __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
 
        /*
         * Reject irqs-disabled contexts including NMI. Operations used
-        * by _next() and _destroy() (vma_end_read, bpf_iter_mmput_async)
+        * by _next() and _destroy() (vma_end_read, fput, bpf_iter_mmput_async)
         * can take spinlocks with IRQs disabled (pi_lock, pool->lock).
         * Running from NMI or from a tracepoint that fires with those
         * locks held could deadlock.
@@ -885,7 +885,7 @@ __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
                goto err_cleanup_iter;
        }
 
-       kit->data->locked_vma = NULL;
+       kit->data->snapshot.vm_file = NULL;
        kit->data->next_addr = addr;
        return 0;
 
@@ -947,26 +947,45 @@ retry:
        return vma;
 }
 
+static void bpf_iter_task_vma_snapshot_reset(struct vm_area_struct *snap)
+{
+       if (snap->vm_file) {
+               fput(snap->vm_file);
+               snap->vm_file = NULL;
+       }
+}
+
 __bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it)
 {
        struct bpf_iter_task_vma_kern *kit = (void *)it;
-       struct vm_area_struct *vma;
+       struct vm_area_struct *snap, *vma;
 
        if (!kit->data) /* bpf_iter_task_vma_new failed */
                return NULL;
 
-       if (kit->data->locked_vma) {
-               vma_end_read(kit->data->locked_vma);
-               kit->data->locked_vma = NULL;
-       }
+       snap = &kit->data->snapshot;
+
+       bpf_iter_task_vma_snapshot_reset(snap);
 
        vma = bpf_iter_task_vma_find_next(kit->data);
        if (!vma)
                return NULL;
 
-       kit->data->locked_vma = vma;
+       memcpy(snap, vma, sizeof(*snap));
+
+       /*
+        * The verifier only trusts vm_mm and vm_file (see
+        * BTF_TYPE_SAFE_TRUSTED_OR_NULL in verifier.c). Take a reference
+        * on vm_file; vm_mm is already correct because lock_vma_under_rcu()
+        * verifies vma->vm_mm == mm. All other pointers are untrusted by
+        * the verifier and left as-is.
+        */
+       if (snap->vm_file)
+               get_file(snap->vm_file);
+
        kit->data->next_addr = vma->vm_end;
-       return vma;
+       vma_end_read(vma);
+       return snap;
 }
 
 __bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
@@ -974,8 +993,7 @@ __bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
        struct bpf_iter_task_vma_kern *kit = (void *)it;
 
        if (kit->data) {
-               if (kit->data->locked_vma)
-                       vma_end_read(kit->data->locked_vma);
+               bpf_iter_task_vma_snapshot_reset(&kit->data->snapshot);
                put_task_struct(kit->data->task);
                bpf_iter_mmput_async(kit->data->mm);
                bpf_mem_free(&bpf_global_ma, kit->data);