]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
net/mlx5: Take lock with IRQs disabled to avoid deadlock
authorMoni Shoua <monis@mellanox.com>
Wed, 26 Dec 2018 17:21:21 +0000 (19:21 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Jan 2020 13:50:15 +0000 (14:50 +0100)
[ Upstream commit 33814e5d127e21f53b52e17b0722c1b57d4f4d29 ]

The lock in qp_table might be taken from process context or from
interrupt context. This may lead to a deadlock unless it is taken with
IRQs disabled.

Discovered by lockdep

================================
WARNING: inconsistent lock state
4.20.0-rc6
--------------------------------
inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W}

python/12572 [HC1[1]:SC0[0]:HE0:SE1] takes:
00000000052a4df4 (&(&table->lock)->rlock#2){?.+.}, /0x50 [mlx5_core]
{HARDIRQ-ON-W} state was registered at:
  _raw_spin_lock+0x33/0x70
  mlx5_get_rsc+0x1a/0x50 [mlx5_core]
  mlx5_ib_eqe_pf_action+0x493/0x1be0 [mlx5_ib]
  process_one_work+0x90c/0x1820
  worker_thread+0x87/0xbb0
  kthread+0x320/0x3e0
  ret_from_fork+0x24/0x30
irq event stamp: 103928
hardirqs last  enabled at (103927): [] nk+0x1a/0x1c
hardirqs last disabled at (103928): [] unk+0x1a/0x1c
softirqs last  enabled at (103924): [] tcp_sendmsg+0x31/0x40
softirqs last disabled at (103922): [] 80

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&table->lock)->rlock#2);

    lock(&(&table->lock)->rlock#2);

 *** DEADLOCK ***

Fixes: 032080ab43ac ("IB/mlx5: Lock QP during page fault handling")
Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/qp.c

index f33707ce8b6b0f26d9ace1d1f20e915e29f49913..479ac21cdbc6940701f1029ba31c699449293235 100644 (file)
@@ -44,14 +44,15 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
 {
        struct mlx5_qp_table *table = &dev->priv.qp_table;
        struct mlx5_core_rsc_common *common;
+       unsigned long flags;
 
-       spin_lock(&table->lock);
+       spin_lock_irqsave(&table->lock, flags);
 
        common = radix_tree_lookup(&table->tree, rsn);
        if (common)
                atomic_inc(&common->refcount);
 
-       spin_unlock(&table->lock);
+       spin_unlock_irqrestore(&table->lock, flags);
 
        if (!common) {
                mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",