return find_executable(qemu_arch_specific, ret_qemu_binary);
}
-int vsock_fix_child_cid(unsigned *machine_cid, const char *machine, int *ret_child_sock) {
+int vsock_fix_child_cid(int vhost_device_fd, unsigned *machine_cid, const char *machine) {
/* this is an arbitrary value picked from /dev/urandom */
static const uint8_t sip_key[HASH_KEY_SIZE] = {
0x03, 0xad, 0xf0, 0xa4,
0xf5, 0x4c, 0x80, 0x52
};
struct siphash machine_hash_state, state;
- _cleanup_close_ int vfd = -EBADF;
int r;
/* uint64_t is required here for the ioctl call, but valid CIDs are only 32 bits */
uint64_t cid = *ASSERT_PTR(machine_cid);
assert(machine);
- assert(ret_child_sock);
+ assert(vhost_device_fd >= 0);
/* Fix the CID of the AF_VSOCK socket passed to qemu
*
* If after another 64 attempts this hasn't worked then give up and return EADDRNOTAVAIL.
*/
- /* remove O_CLOEXEC before this fd is passed to QEMU */
- vfd = open("/dev/vhost-vsock", O_RDWR|O_CLOEXEC);
- if (vfd < 0)
- return log_debug_errno(errno, "Failed to open /dev/vhost-vsock as read/write: %m");
-
if (cid != VMADDR_CID_ANY) {
- r = ioctl(vfd, VHOST_VSOCK_SET_GUEST_CID, &cid);
+ r = ioctl(vhost_device_fd, VHOST_VSOCK_SET_GUEST_CID, &cid);
if (r < 0)
return log_debug_errno(errno, "Failed to set CID for child vsock with user provided CID %" PRIu64 ": %m", cid);
- *ret_child_sock = TAKE_FD(vfd);
return 0;
}
uint64_t hash = siphash24_finalize(&state);
cid = 3 + (hash % (UINT_MAX - 4));
- r = ioctl(vfd, VHOST_VSOCK_SET_GUEST_CID, &cid);
+ r = ioctl(vhost_device_fd, VHOST_VSOCK_SET_GUEST_CID, &cid);
if (r >= 0) {
*machine_cid = cid;
- *ret_child_sock = TAKE_FD(vfd);
return 0;
}
if (errno != EADDRINUSE)
for (unsigned i = 0; i < 64; i++) {
cid = 3 + random_u64_range(UINT_MAX - 4);
- r = ioctl(vfd, VHOST_VSOCK_SET_GUEST_CID, &cid);
+ r = ioctl(vhost_device_fd, VHOST_VSOCK_SET_GUEST_CID, &cid);
if (r >= 0) {
*machine_cid = cid;
- *ret_child_sock = TAKE_FD(vfd);
return 0;
}
#include <string.h>
#include <unistd.h>
+#include "sd-daemon.h"
#include "sd-event.h"
#include "sd-id128.h"
return 0;
}
-static int run_virtual_machine(void) {
+static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
_cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
- _cleanup_close_ int vsock_fd = -EBADF;
- _cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL;
+ _cleanup_free_ char *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL;
+ _cleanup_close_ int notify_sock_fd = -EBADF;
_cleanup_strv_free_ char **cmdline = NULL;
+ _cleanup_free_ int *pass_fds = NULL;
+ size_t n_pass_fds = 0;
+ const char *machine, *accel;
int r;
if (arg_privileged)
log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
"falling back to OVMF firmware blobs without Secure Boot support.");
- const char *accel = use_kvm ? "kvm" : "tcg";
- if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE))
- machine = strjoin("type=virt,accel=", accel);
- else
- machine = strjoin("type=q35,accel=", accel, ",smm=", on_off(ovmf_config->supports_sb));
- if (!machine)
- return log_oom();
-
r = find_qemu_binary(&qemu_binary);
if (r == -EOPNOTSUPP)
return log_error_errno(r, "Native architecture is not supported by qemu.");
if (r < 0)
return log_error_errno(r, "Failed to find QEMU binary: %m");
+ if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE))
+ machine = "type=virt";
+ else
+ machine = ovmf_config->supports_sb ? "type=q35,smm=on" : "type=q35,smm=off";
+
if (asprintf(&mem, "%" PRIu64, DIV_ROUND_UP(arg_qemu_mem, U64_MB)) < 0)
return log_oom();
if (r < 0)
return log_oom();
+ if (!use_kvm && kvm_device_fd >= 0) {
+ log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring");
+ kvm_device_fd = safe_close(kvm_device_fd);
+ }
+
+ if (use_kvm && kvm_device_fd >= 0) {
+ /* /dev/fdset/1 is magic string to tell qemu where to find the fd for /dev/kvm
+ * we use this so that we can take a fd to /dev/kvm and then give qemu that fd */
+ accel = "kvm,device=/dev/fdset/1";
+
+ r = strv_extend(&cmdline, "--add-fd");
+ if (r < 0)
+ return log_oom();
+
+ r = strv_extendf(&cmdline, "fd=%d,set=1,opaque=/dev/kvm", kvm_device_fd);
+ if (r < 0)
+ return log_oom();
+
+ if (!GREEDY_REALLOC(pass_fds, n_pass_fds + 1))
+ return log_oom();
+
+ pass_fds[n_pass_fds++] = kvm_device_fd;
+ } else if (use_kvm)
+ accel = "kvm";
+ else
+ accel = "tcg";
+
+ r = strv_extend_many(&cmdline, "-accel", accel);
+ if (r < 0)
+ return log_oom();
+
bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
if (arg_qemu_vsock < 0) {
r = qemu_check_vsock_support();
use_vsock = r;
}
- unsigned child_cid = VMADDR_CID_ANY;
_cleanup_close_ int child_vsock_fd = -EBADF;
if (use_vsock) {
- child_cid = arg_vsock_cid;
+ int device_fd = vhost_device_fd;
+ unsigned child_cid = arg_vsock_cid;
+
+ if (device_fd < 0) {
+ child_vsock_fd = open("/dev/vhost-vsock", O_RDWR|O_CLOEXEC);
+ if (child_vsock_fd < 0)
+ return log_error_errno(errno, "Failed to open /dev/vhost-vsock as read/write: %m");
+
+ device_fd = child_vsock_fd;
+ }
- r = vsock_fix_child_cid(&child_cid, arg_machine, &child_vsock_fd);
+ r = vsock_fix_child_cid(device_fd, &child_cid, arg_machine);
if (r < 0)
return log_error_errno(r, "Failed to fix CID for the guest vsock socket: %m");
if (r < 0)
return log_oom();
- r = strv_extendf(&cmdline, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, child_vsock_fd);
+ r = strv_extendf(&cmdline, "vhost-vsock-pci,guest-cid=%u,vhostfd=%d", child_cid, device_fd);
if (r < 0)
return log_oom();
+
+ if (!GREEDY_REALLOC(pass_fds, n_pass_fds + 1))
+ return log_oom();
+
+ pass_fds[n_pass_fds++] = device_fd;
}
r = strv_extend_many(&cmdline, "-cpu", "max");
}
if (use_vsock) {
- vsock_fd = open_vsock();
- if (vsock_fd < 0)
- return log_error_errno(vsock_fd, "Failed to open vsock: %m");
+ notify_sock_fd = open_vsock();
+ if (notify_sock_fd < 0)
+ return log_error_errno(notify_sock_fd, "Failed to open vsock: %m");
- r = cmdline_add_vsock(&cmdline, vsock_fd);
+ r = cmdline_add_vsock(&cmdline, notify_sock_fd);
if (r == -ENOMEM)
return log_oom();
if (r < 0)
int exit_status = INT_MAX;
if (use_vsock) {
- r = setup_notify_parent(event, vsock_fd, &exit_status, ¬ify_event_source);
+ r = setup_notify_parent(event, notify_sock_fd, &exit_status, ¬ify_event_source);
if (r < 0)
return log_error_errno(r, "Failed to setup event loop to handle vsock notify events: %m");
}
}
static int run(int argc, char *argv[]) {
- int r;
+ int r, kvm_device_fd = -EBADF, vhost_device_fd = -EBADF;
+ _cleanup_strv_free_ char **names = NULL;
log_setup();
special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
}
+ r = sd_listen_fds_with_names(true, &names);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get passed file descriptors: %m");
+
+ for (int i = 0; i < r; i++) {
+ int fd = SD_LISTEN_FDS_START + i;
+ if (streq(names[i], "kvm"))
+ kvm_device_fd = fd;
+ else if (streq(names[i], "vhost-vsock"))
+ vhost_device_fd = fd;
+ else {
+ log_notice("Couldn't recognize passed fd %d (%s), closing fd and ignoring...", fd, names[i]);
+ safe_close(fd);
+ }
+ }
+
assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, -1) >= 0);
- return run_virtual_machine();
+ return run_virtual_machine(kvm_device_fd, vhost_device_fd);
}
DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);