]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
net/mlx5e: Fix race between DIM disable and net_dim()
authorCarolina Jubran <cjubran@nvidia.com>
Thu, 10 Jul 2025 13:53:43 +0000 (16:53 +0300)
committerJakub Kicinski <kuba@kernel.org>
Fri, 11 Jul 2025 14:26:44 +0000 (07:26 -0700)
There's a race between disabling DIM and NAPI callbacks using the dim
pointer on the RQ or SQ.

If NAPI checks the DIM state bit and sees it still set, it assumes
`rq->dim` or `sq->dim` is valid. But if DIM gets disabled right after
that check, the pointer might already be set to NULL, leading to a NULL
pointer dereference in net_dim().

Fix this by calling `synchronize_net()` before freeing the DIM context.
This ensures all in-progress NAPI callbacks are finished before the
pointer is cleared.

Kernel log:

BUG: kernel NULL pointer dereference, address: 0000000000000000
...
RIP: 0010:net_dim+0x23/0x190
...
Call Trace:
 <TASK>
 ? __die+0x20/0x60
 ? page_fault_oops+0x150/0x3e0
 ? common_interrupt+0xf/0xa0
 ? sysvec_call_function_single+0xb/0x90
 ? exc_page_fault+0x74/0x130
 ? asm_exc_page_fault+0x22/0x30
 ? net_dim+0x23/0x190
 ? mlx5e_poll_ico_cq+0x41/0x6f0 [mlx5_core]
 ? sysvec_apic_timer_interrupt+0xb/0x90
 mlx5e_handle_rx_dim+0x92/0xd0 [mlx5_core]
 mlx5e_napi_poll+0x2cd/0xac0 [mlx5_core]
 ? mlx5e_poll_ico_cq+0xe5/0x6f0 [mlx5_core]
 busy_poll_stop+0xa2/0x200
 ? mlx5e_napi_poll+0x1d9/0xac0 [mlx5_core]
 ? mlx5e_trigger_irq+0x130/0x130 [mlx5_core]
 __napi_busy_loop+0x345/0x3b0
 ? sysvec_call_function_single+0xb/0x90
 ? asm_sysvec_call_function_single+0x16/0x20
 ? sysvec_apic_timer_interrupt+0xb/0x90
 ? pcpu_free_area+0x1e4/0x2e0
 napi_busy_loop+0x11/0x20
 xsk_recvmsg+0x10c/0x130
 sock_recvmsg+0x44/0x70
 __sys_recvfrom+0xbc/0x130
 ? __schedule+0x398/0x890
 __x64_sys_recvfrom+0x20/0x30
 do_syscall_64+0x4c/0x100
 entry_SYSCALL_64_after_hwframe+0x4b/0x53
...
---[ end trace 0000000000000000 ]---
...
---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---

Fixes: 445a25f6e1a2 ("net/mlx5e: Support updating coalescing configuration without resetting channels")
Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://patch.msgid.link/1752155624-24095-3-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/en_dim.c

index 298bb74ec5e9425490eea9825fc9fbc939d2f83c..d1d629697e285f157416a0bc932528167a231481 100644 (file)
@@ -113,7 +113,7 @@ int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enable)
                __set_bit(MLX5E_RQ_STATE_DIM, &rq->state);
        } else {
                __clear_bit(MLX5E_RQ_STATE_DIM, &rq->state);
-
+               synchronize_net();
                mlx5e_dim_disable(rq->dim);
                rq->dim = NULL;
        }
@@ -140,7 +140,7 @@ int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enable)
                __set_bit(MLX5E_SQ_STATE_DIM, &sq->state);
        } else {
                __clear_bit(MLX5E_SQ_STATE_DIM, &sq->state);
-
+               synchronize_net();
                mlx5e_dim_disable(sq->dim);
                sq->dim = NULL;
        }