]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
ssh-proxy: expect OK PORT response from vsock-mux
authorMiao Wang <shankerwangmiao@gmail.com>
Thu, 13 Nov 2025 19:49:15 +0000 (03:49 +0800)
committerLennart Poettering <lennart@poettering.net>
Thu, 18 Dec 2025 20:15:41 +0000 (21:15 +0100)
The unix-domain socket to AF_VSOCK multiplexers in Firecracker and
vhost-device-vsock sends OK PORT response to the client, resulting
ssh clients to abort the connection with the additional response. This
patch addresses this issue by waiting and expecting the possible OK PORT
response from the multiplexer, if any, and then handover the socket fd
to the ssh client. It only checks if the response begins with OK and
consume the response till the first \n, for simplicity.

Signed-off-by: Miao Wang <shankerwangmiao@gmail.com>
src/ssh-generator/ssh-proxy.c

index 30d5a7eed7012f6705c4b5fc86774cc7ad6c1813..f3810388efe64946b3297414619b544d42b1a0a9 100644 (file)
@@ -14,6 +14,9 @@
 #include "socket-util.h"
 #include "string-util.h"
 #include "strv.h"
+#include "time-util.h"
+
+#define HEADER_READ_TIMEOUT_USEC (5 * USEC_PER_SEC)
 
 static int process_vsock_cid(unsigned cid, const char *port) {
         int r;
@@ -91,6 +94,125 @@ static int process_unix(const char *path) {
         return 0;
 }
 
+static int skip_ok_port_res(int fd, const char *path, const char *port) {
+        struct timeval oldtv;
+        socklen_t oldtv_size = sizeof(oldtv);
+        if (getsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &oldtv, &oldtv_size) < 0)
+                return log_error_errno(errno, "Failed to get socket receive timeout for %s: %m", path);
+        if (oldtv_size != sizeof(oldtv))
+                return log_error_errno(SYNTHETIC_ERRNO(EIO), "Unexpected size of socket receive timeout for %s", path);
+        if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, TIMEVAL_STORE(HEADER_READ_TIMEOUT_USEC), sizeof(struct timeval)) < 0)
+                return log_error_errno(errno, "Failed to set socket receive timeout for %s: %m", path);
+
+        char recv_buf[STRLEN("OK 65535\n")];
+        size_t bytes_recv = 0, bytes_avail = 0, pos = 0;
+        static const char expected_prefix[] = "OK ";
+
+        for (;;) {
+                if (pos >= bytes_avail) {
+                        assert(bytes_recv <= bytes_avail);
+                        if (bytes_avail >= sizeof(recv_buf)) {
+                                /*
+                                  Full buffer means that we have peeked as many bytes as possible and not seeing the ending \n .
+                                  So the server is believed to not send OK PORT response, and we just pass out the socket to ssh client,
+                                  and let it handle the connection.
+
+                                  If we have not received any bytes from the socket buffer, we can safely pass out the socket,
+                                  since no change has been made to the socket buffer. Otherwise, if some bytes have been received,
+                                  the socket buffer has been changed, the only option is to give up and terminate the connection.
+                                  Similar logic applies below when we meet other kinds of unexpected responses.
+                                */
+                                if (bytes_recv == 0) {
+                                        log_debug("Received too many bytes while waiting for OK PORT response from %s\n"
+                                                  "Assume the multiplexer is not sending OK PORT.",
+                                                  path);
+                                        goto passout_fd;
+                                }
+                                return log_error_errno(SYNTHETIC_ERRNO(EIO), "Received too many bytes while waiting for OK PORT response from %s", path);
+                        }
+                        if (bytes_avail > bytes_recv) {
+                                /*
+                                  Discard already peeked bytes before peeking more.
+
+                                  XXX: We cannot use SO_RCVLOWAT to set the minimum number of bytes to be peeked to peek entire
+                                       OK PORT response at once to prevent changes to the recving buffer, because SO_RCVLOWAT
+                                       does not work on unix sockets with recv(..., MSG_PEEK). Also poll() does not help here,
+                                       because poll() returns readable as long as there is any data in the socket buffer for
+                                       unix sockets, not respecting SO_RCVLOWAT.
+
+                                  XXX: We could have used SO_PEEK_OFF to continously peek more data without changing the socket
+                                       receive buffer, but this fucntion breaks since Linux 4.3 due to a kernel bug, which is fixed
+                                       in Linux 6.18 commit 7bf3a476ce43 ("af_unix: Read sk_peek_offset() again after sleeping in
+                                       unix_stream_read_generic()."). It is also not possible to detect whether the kernel is
+                                       affected by this bug at runtime.
+
+                                  As a result, we have no other choice but to discard already peeked data here.
+                                */
+                                ssize_t rlen = recv(fd, recv_buf + bytes_recv, bytes_avail - bytes_recv, /* flags= */ 0);
+                                if (rlen < 0)
+                                        return log_error_errno(errno, "Failed to discard OK PORT response from %s: %m", path);
+                                if ((size_t) rlen != bytes_avail - bytes_recv)
+                                        return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while discarding OK PORT response from %s", path);
+                                log_debug("Successfully discarded %zi bytes of response: %.*s", rlen, (int) rlen, recv_buf + bytes_recv);
+                                bytes_recv = bytes_avail;
+                        }
+                        ssize_t len = recv(fd, recv_buf + bytes_avail, sizeof(recv_buf) - bytes_avail, MSG_PEEK);
+                        if (len < 0) {
+                                if (errno != EAGAIN)
+                                        return log_error_errno(errno, "Failed to receive OK from %s: %m", path);
+                                if (bytes_recv == 0) {
+                                        log_debug("Timeout while waiting for OK PORT response from %s\n"
+                                                  "Assume the multiplexer will not send OK PORT.",
+                                                  path);
+                                        goto passout_fd;
+                                }
+                                return log_error_errno(SYNTHETIC_ERRNO(EIO), "Timed out to receive OK PORT from %s", path);
+
+                        }
+                        if (len == 0) {
+                                log_debug("Connection closed while waiting for OK PORT response from %s", path);
+                                if (bytes_recv == 0) {
+                                        log_debug("No data received, which means the connecting port is not open.");
+                                        return log_error_errno(SYNTHETIC_ERRNO(ECONNREFUSED), "Port %s on %s is not open", port, path);
+                                }
+                                return log_error_errno(SYNTHETIC_ERRNO(EIO), "Connection closed before full OK PORT response received from %s.", path);
+                        }
+                        bytes_avail += len;
+                }
+                assert(pos < bytes_avail);
+                if (pos < strlen(expected_prefix) && recv_buf[pos] != expected_prefix[pos]) {
+                        if (bytes_recv == 0) {
+                                log_debug("Received response does not start with expected OK PORT response from %s\n"
+                                          "Assume the multiplexer will not send OK PORT.",
+                                          path);
+                                goto passout_fd;
+                        }
+                        return log_error_errno(SYNTHETIC_ERRNO(EIO), "Received invalid response while waiting for OK PORT from %s", path);
+                }
+                if (recv_buf[pos] == '\n') {
+                        pos += 1;
+                        break;
+                }
+                pos += 1;
+        }
+
+        assert(pos <= sizeof(recv_buf));
+        assert(bytes_recv <= pos);
+        if (bytes_recv < pos) {
+                ssize_t len = recv(fd, recv_buf + bytes_recv, pos - bytes_recv, /* flags= */ 0);
+                if (len < 0)
+                        return log_error_errno(errno, "Failed to discard OK PORT response from %s: %m", path);
+                if ((size_t) len != pos - bytes_recv)
+                        return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while discarding OK PORT response from %s", path);
+                log_debug("Successfully discarded response from %s: %.*s", path, (int) pos, recv_buf);
+        }
+
+passout_fd:
+        if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &oldtv, sizeof(oldtv)) < 0)
+                return log_error_errno(errno, "Failed to restore socket receive timeout for %s: %m", path);
+        return 0;
+}
+
 static int process_vsock_mux(const char *path, const char *port) {
         int r;
 
@@ -127,6 +249,10 @@ static int process_vsock_mux(const char *path, const char *port) {
         if (r < 0)
                 return log_error_errno(r, "Failed to send CONNECT to %s:%s: %m", path, port);
 
+        r = skip_ok_port_res(fd, path, port);
+        if (r < 0)
+                return r;
+
         r = send_one_fd_iov(STDOUT_FILENO, fd, &iovec_nul_byte, /* iovlen= */ 1, /* flags= */ 0);
         if (r < 0)
                 return log_error_errno(r, "Failed to send socket via STDOUT: %m");