]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf/core: Fix deadlock in perf_mmap() failure path
authorPeter Zijlstra <peterz@infradead.org>
Thu, 26 Mar 2026 11:28:21 +0000 (12:28 +0100)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 5 May 2026 10:47:20 +0000 (12:47 +0200)
Ian noted that commit 77de62ad3de3 ("perf/core: Fix refcount bug and
potential UAF in perf_mmap") would cause a deadlock due to
event->mmap_mutex recursion.

This happens because we're now calling perf_mmap_close() under
mmap_mutex, while that function itself can also take mmap_mutex.

Solve this by noting that perf_mmap_close() is far more complicated
than we need at this particular point, since it deals with scenarios
that cannot happen in this particular case.

Replace the call to perf_mmap_close() with a very narrow undo for the
case of first-exposure. If this is not the first mmap(), there is no
race and it is fine to drop the lock and call perf_mmap_close() to
handle to more complicated scenarios.

Note: move the rb->mmap_user (namespace) handling into the rb
init/free code such that it does not complicate the mmap handling.

Fixes: 77de62ad3de3 ("perf/core: Fix refcount bug and potential UAF in perf_mmap")
Reported-by: Ian Rogers <irogers@google.com>
Closes: https://patch.msgid.link/CAP-5%3DfVJyVMZw%3DDqP53Kxg58nUmJ_0bxoaeOKAbC03BVc11HaA%40mail.gmail.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260326112821.GK3738786@noisy.programming.kicks-ass.net
kernel/events/core.c
kernel/events/internal.h
kernel/events/ring_buffer.c

index 6d1f8bad7e1c521023bd151fdf941871bf10890a..7935d5663944ee1cbaf38cf8018c3347635e8d31 100644 (file)
@@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
 }
 
 static void perf_pmu_output_stop(struct perf_event *event);
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
 
 /*
  * A buffer can be mmap()ed multiple times; either directly through the same
@@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
        mapped_f unmapped = get_mapped(event, event_unmapped);
        struct perf_buffer *rb = ring_buffer_get(event);
        struct user_struct *mmap_user = rb->mmap_user;
-       int mmap_locked = rb->mmap_locked;
-       unsigned long size = perf_data_size(rb);
        bool detach_rest = false;
 
        /* FIXIES vs perf_pmu_unregister() */
@@ -7117,11 +7116,7 @@ again:
         * Aside from that, this buffer is 'fully' detached and unmapped,
         * undo the VM accounting.
         */
-
-       atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
-                       &mmap_user->locked_vm);
-       atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
-       free_uid(mmap_user);
+       perf_mmap_unaccount(vma, rb);
 
 out_put:
        ring_buffer_put(rb); /* could be last */
@@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
        atomic64_add(extra, &vma->vm_mm->pinned_vm);
 }
 
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
+{
+       struct user_struct *user = rb->mmap_user;
+
+       atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
+                       &user->locked_vm);
+       atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
+}
+
 static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
                        unsigned long nr_pages)
 {
@@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
        if (!rb)
                return -ENOMEM;
 
-       refcount_set(&rb->mmap_count, 1);
-       rb->mmap_user = get_current_user();
        rb->mmap_locked = extra;
 
        ring_buffer_attach(event, rb);
@@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                        mapped(event, vma->vm_mm);
 
                /*
-                * Try to map it into the page table. On fail, invoke
-                * perf_mmap_close() to undo the above, as the callsite expects
-                * full cleanup in this case and therefore does not invoke
-                * vmops::close().
+                * Try to map it into the page table. On fail undo the above,
+                * as the callsite expects full cleanup in this case and
+                * therefore does not invoke vmops::close().
                 */
                ret = map_range(event->rb, vma);
-               if (ret)
-                       perf_mmap_close(vma);
+               if (likely(!ret))
+                       return 0;
+
+               /* Error path */
+
+               /*
+                * If this is the first mmap(), then event->mmap_count should
+                * be stable at 1. It is only modified by:
+                * perf_mmap_{open,close}() and perf_mmap().
+                *
+                * The former are not possible because this mmap() hasn't been
+                * successful yet, and the latter is serialized by
+                * event->mmap_mutex which we still hold (note that mmap_lock
+                * is not strictly sufficient here, because the event fd can
+                * be passed to another process through trivial means like
+                * fork(), leading to concurrent mmap() from different mm).
+                *
+                * Make sure to remove event->rb before releasing
+                * event->mmap_mutex, such that any concurrent mmap() will not
+                * attempt use this failed buffer.
+                */
+               if (refcount_read(&event->mmap_count) == 1) {
+                       /*
+                        * Minimal perf_mmap_close(); there can't be AUX or
+                        * other events on account of this being the first.
+                        */
+                       mapped = get_mapped(event, event_unmapped);
+                       if (mapped)
+                               mapped(event, vma->vm_mm);
+                       perf_mmap_unaccount(vma, event->rb);
+                       ring_buffer_attach(event, NULL);        /* drops last rb->refcount */
+                       refcount_set(&event->mmap_count, 0);
+                       return ret;
+               }
+
+               /*
+                * Otherwise this is an already existing buffer, and there is
+                * no race vs first exposure, so fall-through and call
+                * perf_mmap_close().
+                */
        }
 
+       perf_mmap_close(vma);
        return ret;
 }
 
index d9cc570830918472e7bd2ca9576fa8fc516a22b5..c03c4f2eea571872c4ac6c451de90228e985ab27 100644 (file)
@@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
        struct perf_buffer *rb;
 
        rb = container_of(rcu_head, struct perf_buffer, rcu_head);
+       free_uid(rb->mmap_user);
        rb_free(rb);
 }
 
index 3e7de266141729bb966e7d61c31f9de00ad3a1a9..9fe92161715e0987520cddb873dd162fd2a77d5c 100644 (file)
@@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
                rb->paused = 1;
 
        mutex_init(&rb->aux_mutex);
+       rb->mmap_user = get_current_user();
+       refcount_set(&rb->mmap_count, 1);
 }
 
 void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)