From cd866f5c4ce5a85ce7076f6beb9b32aacb25fb5a Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 3 Aug 2023 15:45:45 +0200 Subject: [PATCH] s3:smbd: fix multichannel connection passing race If a client opens multiple connection with the same client guid in parallel, our connection passing is likely to hit a race. Assume we have 3 processes: smbdA: This process already handles all connections for a given client guid smbdB: This just received a new connection with an SMB2 neprot for the same client guid smbdC: This also received a new connection with an SMB2 neprot for the same client guid Now both smbdB and smbdC send a MSG_SMBXSRV_CONNECTION_PASS message to smbdA. These messages contain the socket fd for each connection. While waiting for a MSG_SMBXSRV_CONNECTION_PASSED message from smbdA, both smbdB and smbdC watch the smbXcli_client.tdb record for changes (that also verifies smbdA stays alive). Once one of them say smbdB received the MSG_SMBXSRV_CONNECTION_PASSED message, the dbwrap_watch logic will wakeup smbdC in order to let it recheck the smbXcli_client.tdb record in order to handle the case where smbdA died or deleted its record. Now smbdC rechecks the smbXcli_client.tdb record, but it was not woken because of a problem with smbdA. It meant that smbdC sends a MSG_SMBXSRV_CONNECTION_PASS message including the socket fd again. As a result smbdA got the socket fd from smbdC twice (or even more), and creates two (or more) smbXsrv_connection structures for the same low level tcp connection. And it also sends more than one SMB2 negprot response. Depending on the tevent logic, it will use different smbXsrv_connection structures to process incoming requests. And this will almost immediately result in errors. The typicall error is: smb2_validate_sequence_number: smb2_validate_sequence_number: bad message_id 2 (sequence id 2) (granted = 1, low = 1, range = 1) But other errors would also be possible. The detail that leads to the long delays on the client side is that our smbd_server_connection_terminate_ex() code will close only the fd of a single smbXsrv_connection, but the refcount on the socket fd in the kernel is still not 0, so the tcp connection is still alive... Now we remember the server_id of the process that we send the MSG_SMBXSRV_CONNECTION_PASS message to. And just keep watching the smbXcli_client.tdb record if the server_id don't change. As we just need more patience to wait for the MSG_SMBXSRV_CONNECTION_PASSED message. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15346 Signed-off-by: Stefan Metzmacher Reviewed-by: Andreas Schneider Autobuild-User(master): Andreas Schneider Autobuild-Date(master): Tue Aug 8 13:59:58 UTC 2023 on atb-devel-224 (cherry picked from commit f348b84fbcf203ab1ba92840cf7aecd55dbf9aa0) Autobuild-User(v4-18-test): Jule Anger Autobuild-Date(v4-18-test): Fri Aug 11 09:49:53 UTC 2023 on atb-devel-224 --- selftest/knownfail.d/samba3.smb2.multichannel | 1 - source3/smbd/smbXsrv_client.c | 31 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) delete mode 100644 selftest/knownfail.d/samba3.smb2.multichannel diff --git a/selftest/knownfail.d/samba3.smb2.multichannel b/selftest/knownfail.d/samba3.smb2.multichannel deleted file mode 100644 index 26b80a39d5b..00000000000 --- a/selftest/knownfail.d/samba3.smb2.multichannel +++ /dev/null @@ -1 +0,0 @@ -^samba3.smb2.multichannel.bugs.bug_15346 diff --git a/source3/smbd/smbXsrv_client.c b/source3/smbd/smbXsrv_client.c index 577131fc16f..928a9d72caa 100644 --- a/source3/smbd/smbXsrv_client.c +++ b/source3/smbd/smbXsrv_client.c @@ -488,6 +488,7 @@ struct smb2srv_client_mc_negprot_state { struct tevent_context *ev; struct smbd_smb2_request *smb2req; struct db_record *db_rec; + struct server_id sent_server_id; uint64_t watch_instance; uint32_t last_seqnum; struct tevent_req *filter_subreq; @@ -530,6 +531,8 @@ struct tevent_req *smb2srv_client_mc_negprot_send(TALLOC_CTX *mem_ctx, tevent_req_set_cleanup_fn(req, smb2srv_client_mc_negprot_cleanup); + server_id_set_disconnected(&state->sent_server_id); + smb2srv_client_mc_negprot_next(req); if (!tevent_req_is_in_progress(req)) { @@ -625,6 +628,30 @@ verify_again: return; } + if (server_id_equal(&state->sent_server_id, &global->server_id)) { + /* + * We hit a race with other concurrent connections, + * which have woken us. + * + * We already sent the pass or drop message to + * the process, so we need to wait for a + * response and not pass the connection + * again! Otherwise the process would + * receive the same tcp connection via + * more than one file descriptor and + * create more than one smbXsrv_connection + * structure for the same tcp connection, + * which means the client would see more + * than one SMB2 negprot response to its + * single SMB2 netprot request and we + * as server get the session keys and + * message id validation wrong + */ + goto watch_again; + } + + server_id_set_disconnected(&state->sent_server_id); + /* * If last_server_id is set, we expect * smbXsrv_client_global_verify_record() @@ -660,6 +687,7 @@ verify_again: */ goto verify_again; } + state->sent_server_id = global->server_id; if (tevent_req_nterror(req, status)) { return; } @@ -674,11 +702,14 @@ verify_again: */ goto verify_again; } + state->sent_server_id = global->server_id; if (tevent_req_nterror(req, status)) { return; } } +watch_again: + /* * If the record changed, but we are not happy with the change yet, * we better remove ourself from the waiter list -- 2.47.3