]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
nvme-tcp: lockdep: use dynamic lockdep keys per socket instance
authorShin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Thu, 4 Jun 2026 02:32:08 +0000 (11:32 +0900)
committerKeith Busch <kbusch@kernel.org>
Thu, 4 Jun 2026 08:19:08 +0000 (01:19 -0700)
When NVMe-TCP controller setup and teardown are repeated with lockdep
enabled, lockdep reports false positives WARN for the following locks:

  1) &q->elevator_lock        : IO scheduler change context
  2) &q->q_usage_counter(io)  : SCSI disk probe context
  3) fs_reclaim               : CPU hotplug bring-up context
  4) cpu_hotplug_lock         : socket establishment context
  5) sk_lock-AF_INET-NVME     : MQ sched dispatch context for the socket
  6) set->srcu                : NVMe controller delete context

The lockdep WARN was observed by running blktests test case nvme/005 for
tcp transport on v7.1-rc1 kernel with a patch. Refer to the Link tag for
the details of the WARN.

This is a false positive because lockdep confuses lock 4) (socket
establishment) with lock 5) (socket in use) for different socket
instances. The locks belong to different sockets, but lockdep treats
them as the same due to shared static lockdep keys.

Fix this by using dynamically allocated lockdep keys per socket instance
instead of static keys nvme_tcp_sk_key[] and nvme_tcp_slock_key[]. Add
nvme_tcp_sk_key and nvme_tcp_slock_key fields to struct nvme_tcp_queue
and pass them to sock_lock_init_class_and_name() for proper lockdep
tracking. Change the argument of nvme_tcp_reclassify_socket() from
'struct socket *' to 'struct nvme_tcp_queue *' to pass both the socket
and the keys. Add CONFIG_DEBUG_LOCK_ALLOC guards to nvme_tcp_alloc_queue()
and nvme_tcp_free_queue() to register and unregister the dynamic keys.
Additionally, move nvme_tcp_reclassify_socket() inside these guards since
it's only needed when lockdep is enabled.

Link: https://lore.kernel.org/linux-nvme/afB5syZbUrppgsDQ@shinmob/
Suggested-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
drivers/nvme/host/tcp.c

index 353ac6ce9fbdf9aa63516ea8dacfbfd972aa6bac..9d17c88a620054077def63f2f6e9610a41957f6b 100644 (file)
@@ -142,6 +142,11 @@ struct nvme_tcp_queue {
        void (*state_change)(struct sock *);
        void (*data_ready)(struct sock *);
        void (*write_space)(struct sock *);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       struct lock_class_key nvme_tcp_sk_key;
+       struct lock_class_key nvme_tcp_slock_key;
+#endif
 };
 
 struct nvme_tcp_ctrl {
@@ -176,12 +181,9 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
  * a separate class prevents lockdep from conflating nvme-tcp socket use with
  * user-space socket API use.
  */
-static struct lock_class_key nvme_tcp_sk_key[2];
-static struct lock_class_key nvme_tcp_slock_key[2];
-
-static void nvme_tcp_reclassify_socket(struct socket *sock)
+static void nvme_tcp_reclassify_socket(struct nvme_tcp_queue *queue)
 {
-       struct sock *sk = sock->sk;
+       struct sock *sk = queue->sock->sk;
 
        if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
                return;
@@ -189,22 +191,20 @@ static void nvme_tcp_reclassify_socket(struct socket *sock)
        switch (sk->sk_family) {
        case AF_INET:
                sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME",
-                                             &nvme_tcp_slock_key[0],
+                                             &queue->nvme_tcp_slock_key,
                                              "sk_lock-AF_INET-NVME",
-                                             &nvme_tcp_sk_key[0]);
+                                             &queue->nvme_tcp_sk_key);
                break;
        case AF_INET6:
                sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME",
-                                             &nvme_tcp_slock_key[1],
+                                             &queue->nvme_tcp_slock_key,
                                              "sk_lock-AF_INET6-NVME",
-                                             &nvme_tcp_sk_key[1]);
+                                             &queue->nvme_tcp_sk_key);
                break;
        default:
                WARN_ON_ONCE(1);
        }
 }
-#else
-static void nvme_tcp_reclassify_socket(struct socket *sock) { }
 #endif
 
 static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
@@ -1468,6 +1468,11 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
        kfree(queue->pdu);
        mutex_destroy(&queue->send_mutex);
        mutex_destroy(&queue->queue_lock);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       lockdep_unregister_key(&queue->nvme_tcp_sk_key);
+       lockdep_unregister_key(&queue->nvme_tcp_slock_key);
+#endif
 }
 
 static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
@@ -1813,7 +1818,12 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
        }
 
        sk_net_refcnt_upgrade(queue->sock->sk);
-       nvme_tcp_reclassify_socket(queue->sock);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       lockdep_register_key(&queue->nvme_tcp_sk_key);
+       lockdep_register_key(&queue->nvme_tcp_slock_key);
+       nvme_tcp_reclassify_socket(queue);
+#endif
 
        /* Single syn retry */
        tcp_sock_set_syncnt(queue->sock->sk, 1);
@@ -1918,6 +1928,10 @@ err_sock:
        /* Use sync variant - see nvme_tcp_free_queue() for explanation */
        __fput_sync(queue->sock->file);
        queue->sock = NULL;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       lockdep_unregister_key(&queue->nvme_tcp_sk_key);
+       lockdep_unregister_key(&queue->nvme_tcp_slock_key);
+#endif
 err_destroy_mutex:
        mutex_destroy(&queue->send_mutex);
        mutex_destroy(&queue->queue_lock);