]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
page_pool: fix memory-provider leak in page_pool_create_percpu() error path
authorHasan Basbunar <basbunarhasan@gmail.com>
Tue, 28 Apr 2026 17:07:39 +0000 (19:07 +0200)
committerJakub Kicinski <kuba@kernel.org>
Thu, 30 Apr 2026 02:11:49 +0000 (19:11 -0700)
When page_pool_create_percpu() fails on page_pool_list(), it falls
through to its err_uninit: label, which calls page_pool_uninit().
At that point page_pool_init() has already taken two references
when the user requested PP_FLAG_ALLOW_UNREADABLE_NETMEM:

pool->mp_ops->init(pool)
static_branch_inc(&page_pool_mem_providers);

Neither is undone by page_pool_uninit(); both are only undone by
__page_pool_destroy() (success-side teardown). The error path
therefore leaks the per-provider reference taken by mp_ops->init
(io_zcrx_ifq->refs in the io_uring zcrx provider, the dmabuf
binding refcount in the devmem provider) plus one increment of
the page_pool_mem_providers static branch on every failure of
xa_alloc_cyclic() inside page_pool_list().

The leaked io_zcrx_ifq->refs in turn pins everything
io_zcrx_ifq_free() would release on cleanup: ifq->user (uid),
ifq->mm_account (mmdrop), ifq->dev (device refcount),
ifq->netdev_tracker (netdev refcount), and the rbuf region.
The leaked static branch increment forces all subsequent
page_pool_alloc_netmems() and page_pool_return_page() callers to
take the slow mp_ops branch for the lifetime of the kernel.

Reachable via the io_uring zcrx path:

io_uring_register(IORING_REGISTER_ZCRX_IFQ)  /* CAP_NET_ADMIN */
  -> __io_uring_register
  -> io_register_zcrx
  -> zcrx_register_netdev
  -> netif_mp_open_rxq
  -> driver ndo_queue_mem_alloc
  -> page_pool_create_percpu
    -> page_pool_init succeeds (mp_ops->init runs, branch++)
    -> page_pool_list fails (xa_alloc_cyclic -ENOMEM)
    -> goto err_uninit         <-- leak

The same shape applies to the devmem dmabuf provider via
mp_dmabuf_devmem_init()/mp_dmabuf_devmem_destroy().

Restore the cleanup symmetry by moving the mp_ops->destroy() and
static_branch_dec() calls out of __page_pool_destroy() and into
page_pool_uninit(), so page_pool_uninit() is again the strict
inverse of page_pool_init(). page_pool_uninit() has only two
callers (the err_uninit: path and __page_pool_destroy()), so this
preserves the single-call invariant on the success path while
fixing the err path. The error path of page_pool_init() itself
still skips the mp_ops cleanup correctly: mp_ops->init is the
last action that takes a reference before page_pool_init() returns
0, so when it returns an error neither the refcount nor the static
branch has been touched.

Triggering the bug requires xa_alloc_cyclic() to fail with -ENOMEM,
which under normal GFP_KERNEL retry behaviour is rare. It is
deterministic under CONFIG_FAULT_INJECTION with fail_page_alloc /
xa fault injection, or under sustained memory pressure. The leak
is silent: there is no warning, and the released kernel build
continues running with a permanently-incremented static branch.

Fixes: 0f9214046893 ("memory-provider: dmabuf devmem memory provider")
Signed-off-by: Hasan Basbunar <basbunarhasan@gmail.com>
Link: https://patch.msgid.link/20260428170739.34881-1-basbunarhasan@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/core/page_pool.c

index 877bbf7a19389c028d51fe2887579d786a8f51d7..6e576dec80db42ebf5434566db17e46a56b3bf08 100644 (file)
@@ -327,6 +327,11 @@ static void page_pool_uninit(struct page_pool *pool)
        if (!pool->system)
                free_percpu(pool->recycle_stats);
 #endif
+
+       if (pool->mp_ops) {
+               pool->mp_ops->destroy(pool);
+               static_branch_dec(&page_pool_mem_providers);
+       }
 }
 
 /**
@@ -1146,11 +1151,6 @@ static void __page_pool_destroy(struct page_pool *pool)
        page_pool_unlist(pool);
        page_pool_uninit(pool);
 
-       if (pool->mp_ops) {
-               pool->mp_ops->destroy(pool);
-               static_branch_dec(&page_pool_mem_providers);
-       }
-
        kfree(pool);
 }