]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
RDMA/rxe: Fix a use-after-free problem in rxe_mmap
authorZhu Yanjun <yanjun.zhu@linux.dev>
Fri, 15 May 2026 00:25:37 +0000 (02:25 +0200)
committerJason Gunthorpe <jgg@nvidia.com>
Mon, 25 May 2026 14:25:16 +0000 (11:25 -0300)
rxe_mmap() removes a rxe_mmap_info struct from the pending_mmaps list
and releases pending_lock while the struct's kref is still at 1:

   list_del_init(&ip->pending_mmaps);
   spin_unlock_bh(&rxe->pending_lock);   /* ref == 1, no lock held */
   ret = remap_vmalloc_range(vma, ip->obj, 0);  /* walks PTEs */
   [...]
   rxe_vma_open(vma);                    /* kref_get, ref → 2 */
   remap_vmalloc_range_partial() walks PTEs without any lock.

A concurrent DESTROY_CQ ioctl on another CPU calls:

    kref_put(&q->ip->ref, rxe_mmap_release)   /* ref 1→0 */
    vfree(ip->obj)   /* clears vmalloc PTEs mid-walk */
    kfree(ip)        /* frees rxe_mmap_info */

This yields:

   1. Kernel crash, vmalloc_to_page() returns NULL when vfree wins the
   per-PTE race -> vm_insert_page(NULL) → GPF in validate_page_before_insert

   2. Page UAF, vmalloc_to_page() reads a stale PTE before vfree clears
   it. User VMA holds a PTE to a free'd page which might eventually get
   reallocated later by vmalloc which allows the attacker to get a clean
   page-level UAF.

   It is worth noting that even though a page-level UAF is possible given
   the strong primitive, it is statistically very difficult to achieve
   given the very short time window (after the last insert_page and before
   the kref_get).

The call trace are as below:

  Oops: general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] SMP KASAN NOPTI
  KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
  CPU: 0 UID: 1000 PID: 413 Comm: poc Not tainted 7.0.0-rc5-dirty #28 PREEMPT(lazy)
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
  RIP: 0010:validate_page_before_insert+0x32/0x300
  Code: e5 41 57 41 56 49 89 fe 41 55 41 54 53 48 89 f3 e8 93 b5 a3 ff 48 8d 7b 08 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 7b 02 00 00 4c 8b 63 08 31 ff 4d 89 e5 41 83 e5
  RSP: 0018:ffff88811b15f2f0 EFLAGS: 00000202
  RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000000000000008
  RBP: ffff88811b15f318 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000000 R12: ffff8881181eee00
  R13: 0000000000000000 R14: ffff8881181eee00 R15: ffff8881181eee20
  FS:  00007b1e000f76c0(0000) GS:ffff8884268e0000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007b1e00a24ac0 CR3: 0000000116eb3000 CR4: 00000000000006f0
  Call Trace:
   <TASK>
   insert_page+0x8f/0x190
   ? __pfx_insert_page+0x10/0x10
   ? kasan_save_alloc_info+0x38/0x60
   vm_insert_page+0x2e7/0x400
   remap_vmalloc_range_partial+0x212/0x3e0
   remap_vmalloc_range+0x6e/0xb0
   ? __kasan_check_write+0x14/0x30
   rxe_mmap+0x2e9/0x5d0
   ib_uverbs_mmap+0x1ad/0x2c0
   __mmap_region+0x12c2/0x2ad0
   ? __pfx___mmap_region+0x10/0x10
   ? __sanitizer_cov_trace_switch+0x58/0xb0
   ? mas_prev_slot+0x360/0x39c0
   ? __sanitizer_cov_trace_switch+0x58/0xb0
   ? mas_next_slot+0x1e5b/0x2f40
   ? __sanitizer_cov_trace_cmp8+0x18/0x30
   ? unmapped_area_topdown+0x4dd/0x610
   ? kfree+0x1b1/0x440
   ? free_cpumask_var+0x16/0x30
   ? __kasan_slab_free+0x7d/0xa0
   ? __sanitizer_cov_trace_cmp8+0x18/0x30
   mmap_region+0x2e6/0x3c0
   do_mmap+0xa3e/0x12a0
   ? __pfx_do_mmap+0x10/0x10
   ? __kasan_check_write+0x14/0x30
   ? down_write_killable+0xba/0x160
   ? __pfx_down_write_killable+0x10/0x10
   ? __sanitizer_cov_trace_cmp4+0x16/0x30
   vm_mmap_pgoff+0x2d4/0x4a0
   ? __pfx_vm_mmap_pgoff+0x10/0x10
   ? fget+0x1bf/0x270
   ksys_mmap_pgoff+0x40c/0x690
   ? __sanitizer_cov_trace_const_cmp4+0x16/0x30
   ? __pfx_ksys_mmap_pgoff+0x10/0x10
   ? __kasan_check_write+0x14/0x30
   ? _raw_spin_trylock+0xbb/0x130
   ? __pfx__raw_spin_trylock+0x10/0x10
   __x64_sys_mmap+0x135/0x1e0
   x64_sys_call+0x1c14/0x2790
   do_syscall_64+0xd2/0x1050
   ? rcu_core+0x352/0x7d0
   ? rcu_core_si+0xe/0x20
   ? handle_softirqs+0x1aa/0x650
   ? __sanitizer_cov_trace_cmp4+0x16/0x30
   ? fpregs_assert_state_consistent+0xe1/0x160
   ? irqentry_exit+0xb1/0x670
   entry_SYSCALL_64_after_hwframe+0x76/0x7e

Link: https://patch.msgid.link/r/20260515002537.6209-1-yanjun.zhu@linux.dev
Reported-and-tested-by: nasm <n4sm@protonmail.com>
Suggested-by: nasm <n4sm@protonmail.com>
Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/sw/rxe/rxe_mmap.c

index db380302149e51a76aa1b03c9aae367570e36454..7f723a2f3700595c3e7665835fba25d2b33e4f5f 100644 (file)
@@ -93,18 +93,31 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
        goto done;
 
 found_it:
+       /*
+        * Increment refcount and check whether it is being freed atm while
+        * holding lock to prevent UAF
+        */
+       if (!kref_get_unless_zero(&ip->ref)) {
+               spin_unlock_bh(&rxe->pending_lock);
+               ret = -ENXIO;
+               goto done;
+       }
+
        list_del_init(&ip->pending_mmaps);
        spin_unlock_bh(&rxe->pending_lock);
 
+       vma->vm_ops = &rxe_vm_ops;
+       vma->vm_private_data = ip;
+
        ret = remap_vmalloc_range(vma, ip->obj, 0);
        if (ret) {
+               vma->vm_private_data = NULL;
+               vma->vm_ops = NULL;
+               kref_put(&ip->ref, rxe_mmap_release);
                rxe_dbg_dev(rxe, "err %d from remap_vmalloc_range\n", ret);
                goto done;
        }
 
-       vma->vm_ops = &rxe_vm_ops;
-       vma->vm_private_data = ip;
-       rxe_vma_open(vma);
 done:
        return ret;
 }