]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
smb: client: fix potential deadlock when reconnecting channels
authorPaulo Alcantara <pc@manguebit.org>
Wed, 25 Jun 2025 15:22:38 +0000 (12:22 -0300)
committerSteve French <stfrench@microsoft.com>
Thu, 26 Jun 2025 16:12:04 +0000 (11:12 -0500)
Fix cifs_signal_cifsd_for_reconnect() to take the correct lock order
and prevent the following deadlock from happening

======================================================
WARNING: possible circular locking dependency detected
6.16.0-rc3-build2+ #1301 Tainted: G S      W
------------------------------------------------------
cifsd/6055 is trying to acquire lock:
ffff88810ad56038 (&tcp_ses->srv_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x134/0x200

but task is already holding lock:
ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&ret_buf->chan_lock){+.+.}-{3:3}:
       validate_chain+0x1cf/0x270
       __lock_acquire+0x60e/0x780
       lock_acquire.part.0+0xb4/0x1f0
       _raw_spin_lock+0x2f/0x40
       cifs_setup_session+0x81/0x4b0
       cifs_get_smb_ses+0x771/0x900
       cifs_mount_get_session+0x7e/0x170
       cifs_mount+0x92/0x2d0
       cifs_smb3_do_mount+0x161/0x460
       smb3_get_tree+0x55/0x90
       vfs_get_tree+0x46/0x180
       do_new_mount+0x1b0/0x2e0
       path_mount+0x6ee/0x740
       do_mount+0x98/0xe0
       __do_sys_mount+0x148/0x180
       do_syscall_64+0xa4/0x260
       entry_SYSCALL_64_after_hwframe+0x76/0x7e

-> #1 (&ret_buf->ses_lock){+.+.}-{3:3}:
       validate_chain+0x1cf/0x270
       __lock_acquire+0x60e/0x780
       lock_acquire.part.0+0xb4/0x1f0
       _raw_spin_lock+0x2f/0x40
       cifs_match_super+0x101/0x320
       sget+0xab/0x270
       cifs_smb3_do_mount+0x1e0/0x460
       smb3_get_tree+0x55/0x90
       vfs_get_tree+0x46/0x180
       do_new_mount+0x1b0/0x2e0
       path_mount+0x6ee/0x740
       do_mount+0x98/0xe0
       __do_sys_mount+0x148/0x180
       do_syscall_64+0xa4/0x260
       entry_SYSCALL_64_after_hwframe+0x76/0x7e

-> #0 (&tcp_ses->srv_lock){+.+.}-{3:3}:
       check_noncircular+0x95/0xc0
       check_prev_add+0x115/0x2f0
       validate_chain+0x1cf/0x270
       __lock_acquire+0x60e/0x780
       lock_acquire.part.0+0xb4/0x1f0
       _raw_spin_lock+0x2f/0x40
       cifs_signal_cifsd_for_reconnect+0x134/0x200
       __cifs_reconnect+0x8f/0x500
       cifs_handle_standard+0x112/0x280
       cifs_demultiplex_thread+0x64d/0xbc0
       kthread+0x2f7/0x310
       ret_from_fork+0x2a/0x230
       ret_from_fork_asm+0x1a/0x30

other info that might help us debug this:

Chain exists of:
  &tcp_ses->srv_lock --> &ret_buf->ses_lock --> &ret_buf->chan_lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&ret_buf->chan_lock);
                               lock(&ret_buf->ses_lock);
                               lock(&ret_buf->chan_lock);
  lock(&tcp_ses->srv_lock);

 *** DEADLOCK ***

3 locks held by cifsd/6055:
 #0: ffffffff857de398 (&cifs_tcp_ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x7b/0x200
 #1: ffff888119c64060 (&ret_buf->ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x9c/0x200
 #2: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200

Cc: linux-cifs@vger.kernel.org
Reported-by: David Howells <dhowells@redhat.com>
Fixes: d7d7a66aacd6 ("cifs: avoid use of global locks for high contention data")
Reviewed-by: David Howells <dhowells@redhat.com>
Tested-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
fs/smb/client/cifsglob.h
fs/smb/client/connect.c

index 45e94e18f4d59a7cefad33b890865b0cd2c17b46..318a8405d475000226ee3207c83ca22d42526718 100644 (file)
@@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count)
 struct TCP_Server_Info {
        struct list_head tcp_ses_list;
        struct list_head smb_ses_list;
+       struct list_head rlist; /* reconnect list */
        spinlock_t srv_lock;  /* protect anything here that is not protected */
        __u64 conn_id; /* connection identifier (useful for debugging) */
        int srv_count; /* reference counter */
index c48869c29e154ac97416c53f62bdeaa5b3b0495f..685c65dcb8c41f40f680016e60957504e51bac1b 100644 (file)
@@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work)
                           (SMB_INTERFACE_POLL_INTERVAL * HZ));
 }
 
+#define set_need_reco(server) \
+do { \
+       spin_lock(&server->srv_lock); \
+       if (server->tcpStatus != CifsExiting) \
+               server->tcpStatus = CifsNeedReconnect; \
+       spin_unlock(&server->srv_lock); \
+} while (0)
+
 /*
  * Update the tcpStatus for the server.
  * This is used to signal the cifsd thread to call cifs_reconnect
@@ -137,39 +145,45 @@ void
 cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
                                bool all_channels)
 {
-       struct TCP_Server_Info *pserver;
+       struct TCP_Server_Info *nserver;
        struct cifs_ses *ses;
+       LIST_HEAD(reco);
        int i;
 
-       /* If server is a channel, select the primary channel */
-       pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;
-
        /* if we need to signal just this channel */
        if (!all_channels) {
-               spin_lock(&server->srv_lock);
-               if (server->tcpStatus != CifsExiting)
-                       server->tcpStatus = CifsNeedReconnect;
-               spin_unlock(&server->srv_lock);
+               set_need_reco(server);
                return;
        }
 
-       spin_lock(&cifs_tcp_ses_lock);
-       list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
-               if (cifs_ses_exiting(ses))
-                       continue;
-               spin_lock(&ses->chan_lock);
-               for (i = 0; i < ses->chan_count; i++) {
-                       if (!ses->chans[i].server)
+       if (SERVER_IS_CHAN(server))
+               server = server->primary_server;
+       scoped_guard(spinlock, &cifs_tcp_ses_lock) {
+               set_need_reco(server);
+               list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+                       spin_lock(&ses->ses_lock);
+                       if (ses->ses_status == SES_EXITING) {
+                               spin_unlock(&ses->ses_lock);
                                continue;
-
-                       spin_lock(&ses->chans[i].server->srv_lock);
-                       if (ses->chans[i].server->tcpStatus != CifsExiting)
-                               ses->chans[i].server->tcpStatus = CifsNeedReconnect;
-                       spin_unlock(&ses->chans[i].server->srv_lock);
+                       }
+                       spin_lock(&ses->chan_lock);
+                       for (i = 1; i < ses->chan_count; i++) {
+                               nserver = ses->chans[i].server;
+                               if (!nserver)
+                                       continue;
+                               nserver->srv_count++;
+                               list_add(&nserver->rlist, &reco);
+                       }
+                       spin_unlock(&ses->chan_lock);
+                       spin_unlock(&ses->ses_lock);
                }
-               spin_unlock(&ses->chan_lock);
        }
-       spin_unlock(&cifs_tcp_ses_lock);
+
+       list_for_each_entry_safe(server, nserver, &reco, rlist) {
+               list_del_init(&server->rlist);
+               set_need_reco(server);
+               cifs_put_tcp_session(server, 0);
+       }
 }
 
 /*