#define VM_TAP_HASH_KEY SD_ID128_MAKE(01,d0,c6,4c,2b,df,24,fb,c0,f8,b2,09,7d,59,b2,93)
-#define DISK_SERIAL_MAX_LEN_SCSI 30
-#define DISK_SERIAL_MAX_LEN_NVME 20
+#define DISK_SERIAL_MAX_LEN_SCSI 30
+#define DISK_SERIAL_MAX_LEN_NVME 20
+#define DISK_SERIAL_MAX_LEN_VIRTIO_BLK 20
/* Spare pcie-root-ports reserved for future runtime hotplug beyond the pre-wired devices. */
#define VMSPAWN_PCIE_HOTPLUG_SPARES 10u
"mode", "control");
}
+static int resolve_disk_driver(DiskType dt, const char *filename, DriveInfo *info) {
+ int r;
+
+ assert(filename);
+ assert(info);
+
+ switch (dt) {
+ case DISK_TYPE_VIRTIO_BLK:
+ info->disk_driver = "virtio-blk-pci";
+ r = disk_serial(filename, DISK_SERIAL_MAX_LEN_VIRTIO_BLK, &info->serial);
+ if (r < 0)
+ return r;
+ break;
+ case DISK_TYPE_VIRTIO_SCSI:
+ info->disk_driver = "scsi-hd";
+ r = disk_serial(filename, DISK_SERIAL_MAX_LEN_SCSI, &info->serial);
+ if (r < 0)
+ return r;
+ break;
+ case DISK_TYPE_NVME:
+ info->disk_driver = "nvme";
+ r = disk_serial(filename, DISK_SERIAL_MAX_LEN_NVME, &info->serial);
+ if (r < 0)
+ return r;
+ break;
+ case DISK_TYPE_VIRTIO_SCSI_CDROM:
+ info->disk_driver = "scsi-cd";
+ info->flags |= QMP_DRIVE_READ_ONLY;
+ r = disk_serial(filename, DISK_SERIAL_MAX_LEN_SCSI, &info->serial);
+ if (r < 0)
+ return r;
+ break;
+ default:
+ assert_not_reached();
+ }
+
+ return 0;
+}
+
+static int prepare_primary_drive(const char *runtime_dir, DriveInfos *drives) {
+ int r;
+
+ assert(runtime_dir);
+ assert(drives);
+
+ if (!arg_image)
+ return 0;
+
+ _cleanup_free_ char *image_fn = NULL;
+ r = path_extract_filename(arg_image, &image_fn);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract filename from path '%s': %m", arg_image);
+
+ DriveInfo *d = &drives->drives[drives->n_drives++];
+ *d = (DriveInfo) { .fd = -EBADF, .overlay_fd = -EBADF };
+
+ r = resolve_disk_driver(arg_image_disk_type, image_fn, d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve disk driver for '%s': %m", image_fn);
+
+ int open_flags = ((arg_ephemeral || FLAGS_SET(d->flags, QMP_DRIVE_READ_ONLY)) ? O_RDONLY : O_RDWR) | O_CLOEXEC | O_NOCTTY;
+
+ _cleanup_close_ int image_fd = open(arg_image, open_flags);
+ if (image_fd < 0)
+ return log_error_errno(errno, "Failed to open '%s': %m", arg_image);
+
+ struct stat st;
+ if (fstat(image_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat '%s': %m", arg_image);
+
+ r = stat_verify_regular_or_block(&st);
+ if (r < 0)
+ return log_error_errno(r, "Expected regular file or block device for image: %s", arg_image);
+
+ d->path = arg_image;
+ d->format = image_format_to_string(arg_image_format);
+ d->node_name = strdup("vmspawn");
+ if (!d->node_name)
+ return log_oom();
+ d->fd = TAKE_FD(image_fd);
+ if (S_ISBLK(st.st_mode))
+ d->flags |= QMP_DRIVE_BLOCK_DEVICE;
+ if (arg_discard_disk && !FLAGS_SET(d->flags, QMP_DRIVE_READ_ONLY))
+ d->flags |= QMP_DRIVE_DISCARD;
+ d->flags |= QMP_DRIVE_BOOT;
+
+ /* For ephemeral mode, create an anonymous overlay file. QEMU will format it
+ * as qcow2 via blockdev-create, so no filesystem path is needed.
+ * Skip for read-only drives (e.g. CDROM) where overlays are not meaningful. */
+ if (arg_ephemeral && !FLAGS_SET(d->flags, QMP_DRIVE_READ_ONLY)) {
+ _cleanup_close_ int overlay_fd = open(runtime_dir, O_TMPFILE | O_RDWR | O_CLOEXEC, 0600);
+ if (overlay_fd < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(errno))
+ return log_error_errno(errno, "Failed to create ephemeral overlay in '%s': %m", runtime_dir);
+
+ /* Fallback to memfd if O_TMPFILE is not supported */
+ overlay_fd = memfd_new("vmspawn-overlay");
+ if (overlay_fd < 0)
+ return log_error_errno(overlay_fd, "Failed to create ephemeral overlay via memfd: %m");
+ }
+ d->overlay_fd = TAKE_FD(overlay_fd);
+ d->flags |= QMP_DRIVE_NO_FLUSH;
+ }
+
+ return 0;
+}
+
+static int prepare_extra_drives(DriveInfos *drives) {
+ int r;
+
+ assert(drives);
+
+ size_t extra_idx = 0;
+ FOREACH_ARRAY(drive, arg_extra_drives.drives, arg_extra_drives.n_drives) {
+ _cleanup_free_ char *drive_fn = NULL;
+ r = path_extract_filename(drive->path, &drive_fn);
+ if (r < 0)
+ return log_error_errno(r, "Failed to extract filename from path '%s': %m", drive->path);
+
+ DiskType dt = drive->disk_type >= 0 ? drive->disk_type : arg_image_disk_type;
+
+ DriveInfo *d = &drives->drives[drives->n_drives++];
+ *d = (DriveInfo) { .fd = -EBADF, .overlay_fd = -EBADF };
+
+ r = resolve_disk_driver(dt, drive_fn, d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve disk driver for '%s': %m", drive_fn);
+
+ _cleanup_close_ int drive_fd = open(drive->path, (FLAGS_SET(d->flags, QMP_DRIVE_READ_ONLY) ? O_RDONLY : O_RDWR) | O_CLOEXEC | O_NOCTTY);
+ if (drive_fd < 0)
+ return log_error_errno(errno, "Failed to open '%s': %m", drive->path);
+
+ struct stat drive_st;
+ if (fstat(drive_fd, &drive_st) < 0)
+ return log_error_errno(errno, "Failed to stat '%s': %m", drive->path);
+ r = stat_verify_regular_or_block(&drive_st);
+ if (r < 0)
+ return log_error_errno(r, "Expected regular file or block device, not '%s'.", drive->path);
+ if (S_ISBLK(drive_st.st_mode) && drive->format == IMAGE_FORMAT_QCOW2)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Block device '%s' cannot be used with 'qcow2' format, only 'raw' is supported.",
+ drive->path);
+
+ d->path = drive->path;
+ d->format = image_format_to_string(drive->format);
+ d->fd = TAKE_FD(drive_fd);
+ if (S_ISBLK(drive_st.st_mode))
+ d->flags |= QMP_DRIVE_BLOCK_DEVICE;
+ d->flags |= QMP_DRIVE_NO_FLUSH;
+
+ if (asprintf(&d->node_name, "vmspawn_extra_%zu", extra_idx++) < 0)
+ return log_oom();
+ }
+
+ return 0;
+}
+
+/* Assign PCIe root port names to devices. The ports were pre-allocated in the config
+ * file. Each PCI device that will be hotplugged via QMP device_add gets a port. */
+static int assign_pcie_ports(MachineConfig *c) {
+ assert(c);
+
+ if (!ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS)
+ return 0;
+
+ DriveInfos *drives = &c->drives;
+ NetworkInfo *network = &c->network;
+ VirtiofsInfos *virtiofs = &c->virtiofs;
+ VsockInfo *vsock = &c->vsock;
+
+ size_t port = 0;
+
+ /* Drives: non-SCSI drives get individual ports, SCSI controller gets one port */
+ bool need_scsi = false;
+ FOREACH_ARRAY(d, drives->drives, drives->n_drives) {
+ if (STR_IN_SET(d->disk_driver, "scsi-hd", "scsi-cd")) {
+ need_scsi = true;
+ continue;
+ }
+ if (asprintf(&d->pcie_port, "vmspawn-pcieport-%zu", port++) < 0)
+ return log_oom();
+ }
+ if (need_scsi)
+ if (asprintf(&drives->scsi_pcie_port, "vmspawn-pcieport-%zu", port++) < 0)
+ return log_oom();
+
+ if (network->type)
+ if (asprintf(&network->pcie_port, "vmspawn-pcieport-%zu", port++) < 0)
+ return log_oom();
+
+ FOREACH_ARRAY(v, virtiofs->entries, virtiofs->n_entries)
+ if (asprintf(&v->pcie_port, "vmspawn-pcieport-%zu", port++) < 0)
+ return log_oom();
+
+ if (vsock->fd >= 0)
+ if (asprintf(&vsock->pcie_port, "vmspawn-pcieport-%zu", port++) < 0)
+ return log_oom();
+
+ return 0;
+}
+
+static int prepare_device_info(const char *runtime_dir, MachineConfig *c) {
+ int r;
+
+ assert(runtime_dir);
+ assert(c);
+
+ DriveInfos *drives = &c->drives;
+
+ /* Build drive info for QMP-based setup. vmspawn opens all image files and
+ * passes fds to QEMU via add-fd — QEMU never needs filesystem access. */
+ drives->drives = new0(DriveInfo, 1 + arg_extra_drives.n_drives);
+ if (!drives->drives)
+ return log_oom();
+
+ r = prepare_primary_drive(runtime_dir, drives);
+ if (r < 0)
+ return r;
+
+ r = prepare_extra_drives(drives);
+ if (r < 0)
+ return r;
+
+ return assign_pcie_ports(c);
+}
+
static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
_cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
_cleanup_free_ char *qemu_binary = NULL, *mem = NULL;
}
}
- bool need_scsi_controller =
- IN_SET(arg_image_disk_type, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_VIRTIO_SCSI_CDROM) && arg_image;
- if (!need_scsi_controller)
- FOREACH_ARRAY(drive, arg_extra_drives.drives, arg_extra_drives.n_drives) {
- DiskType dt = drive->disk_type >= 0 ? drive->disk_type : arg_image_disk_type;
- if (IN_SET(dt, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_VIRTIO_SCSI_CDROM)) {
- need_scsi_controller = true;
- break;
- }
- }
-
- if (need_scsi_controller) {
- r = qemu_config_section(config_file, "device", "vmspawn_scsi",
- "driver", "virtio-scsi-pci");
- if (r < 0)
- return r;
- }
-
if (arg_image) {
assert(!arg_directory);
arg_image);
}
- if (arg_image_disk_type == DISK_TYPE_VIRTIO_SCSI_CDROM)
- r = qemu_config_section(config_file, "drive", "vmspawn",
- "if", "none",
- "file", arg_image,
- "format", image_format_to_string(arg_image_format),
- "media", "cdrom",
- "readonly", "on");
- else
- r = qemu_config_section(config_file, "drive", "vmspawn",
- "if", "none",
- "file", arg_image,
- "format", image_format_to_string(arg_image_format),
- "discard", on_off(arg_discard_disk),
- "snapshot", on_off(arg_ephemeral));
- if (r < 0)
- return r;
-
- _cleanup_free_ char *image_fn = NULL;
- r = path_extract_filename(arg_image, &image_fn);
- if (r < 0)
- return log_error_errno(r, "Failed to extract filename from path '%s': %m", image_fn);
-
- const char *disk_driver;
- _cleanup_free_ char *serial = NULL;
-
- switch (arg_image_disk_type) {
- case DISK_TYPE_VIRTIO_BLK:
- disk_driver = "virtio-blk-pci";
- serial = strdup(image_fn);
- if (!serial)
- return log_oom();
- break;
- case DISK_TYPE_VIRTIO_SCSI:
- disk_driver = "scsi-hd";
- r = disk_serial(image_fn, DISK_SERIAL_MAX_LEN_SCSI, &serial);
- if (r < 0)
- return log_oom();
- break;
- case DISK_TYPE_NVME:
- disk_driver = "nvme";
- r = disk_serial(image_fn, DISK_SERIAL_MAX_LEN_NVME, &serial);
- if (r < 0)
- return log_oom();
- break;
- case DISK_TYPE_VIRTIO_SCSI_CDROM:
- disk_driver = "scsi-cd";
- r = disk_serial(image_fn, DISK_SERIAL_MAX_LEN_SCSI, &serial);
- if (r < 0)
- return log_oom();
- break;
- default:
- assert_not_reached();
- }
-
- r = qemu_config_section(config_file, "device", "vmspawn-disk",
- "driver", disk_driver,
- "drive", "vmspawn",
- "bootindex", "1",
- "serial", serial);
- if (r < 0)
- return r;
-
- if (IN_SET(arg_image_disk_type, DISK_TYPE_VIRTIO_SCSI, DISK_TYPE_VIRTIO_SCSI_CDROM)) {
- r = qemu_config_key(config_file, "bus", "vmspawn_scsi.0");
- if (r < 0)
- return r;
- }
-
if (arg_image_disk_type != DISK_TYPE_VIRTIO_SCSI_CDROM) {
r = grow_image(arg_image, arg_grow_image);
if (r < 0)
return log_oom();
}
- size_t i = 0;
- FOREACH_ARRAY(drive, arg_extra_drives.drives, arg_extra_drives.n_drives) {
- if (strv_extend(&cmdline, "-blockdev") < 0)
- return log_oom();
-
- _cleanup_free_ char *escaped_drive = escape_qemu_value(drive->path);
- if (!escaped_drive)
- return log_oom();
-
- struct stat st;
- if (stat(drive->path, &st) < 0)
- return log_error_errno(errno, "Failed to stat '%s': %m", drive->path);
-
- const char *driver = NULL;
- if (S_ISREG(st.st_mode))
- driver = "file";
- else if (S_ISBLK(st.st_mode)) {
- if (drive->format == IMAGE_FORMAT_QCOW2)
- return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
- "Block device '%s' cannot be used with 'qcow2' format, only 'raw' is supported.",
- drive->path);
- driver = "host_device";
- } else
- return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expected regular file or block device, not '%s'.", drive->path);
-
- DiskType dt = drive->disk_type >= 0 ? drive->disk_type : arg_image_disk_type;
-
- if (strv_extendf(&cmdline, "driver=%s,cache.direct=off,cache.no-flush=on,file.driver=%s,file.filename=%s,node-name=vmspawn_extra_%zu%s",
- image_format_to_string(drive->format), driver, escaped_drive, i,
- dt == DISK_TYPE_VIRTIO_SCSI_CDROM ? ",read-only=on" : "") < 0)
- return log_oom();
-
- _cleanup_free_ char *drive_fn = NULL;
- r = path_extract_filename(drive->path, &drive_fn);
- if (r < 0)
- return log_error_errno(r, "Failed to extract filename from path '%s': %m", drive->path);
-
- _cleanup_free_ char *escaped_drive_fn = escape_qemu_value(drive_fn);
- if (!escaped_drive_fn)
- return log_oom();
-
- if (strv_extend(&cmdline, "-device") < 0)
- return log_oom();
-
- switch (dt) {
- case DISK_TYPE_VIRTIO_BLK:
- if (strv_extendf(&cmdline, "virtio-blk-pci,drive=vmspawn_extra_%zu,serial=%s", i++, escaped_drive_fn) < 0)
- return log_oom();
- break;
- case DISK_TYPE_VIRTIO_SCSI: {
- _cleanup_free_ char *serial = NULL;
- r = disk_serial(escaped_drive_fn, DISK_SERIAL_MAX_LEN_SCSI, &serial);
- if (r < 0)
- return log_oom();
- if (strv_extendf(&cmdline, "scsi-hd,bus=vmspawn_scsi.0,drive=vmspawn_extra_%zu,serial=%s", i++, serial) < 0)
- return log_oom();
- break;
- }
- case DISK_TYPE_NVME: {
- _cleanup_free_ char *serial = NULL;
- r = disk_serial(escaped_drive_fn, DISK_SERIAL_MAX_LEN_NVME, &serial);
- if (r < 0)
- return log_oom();
- if (strv_extendf(&cmdline, "nvme,drive=vmspawn_extra_%zu,serial=%s", i++, serial) < 0)
- return log_oom();
- break;
- }
- case DISK_TYPE_VIRTIO_SCSI_CDROM: {
- _cleanup_free_ char *serial = NULL;
- r = disk_serial(escaped_drive_fn, DISK_SERIAL_MAX_LEN_SCSI, &serial);
- if (r < 0)
- return log_oom();
- if (strv_extendf(&cmdline, "scsi-cd,bus=vmspawn_scsi.0,drive=vmspawn_extra_%zu,serial=%s", i++, serial) < 0)
- return log_oom();
- break;
- }
- default:
- assert_not_reached();
- }
- }
+ /* Extra drive validation is done in the post-fork drive info construction loop
+ * to avoid stat()'ing each drive twice. */
if (!IN_SET(arg_console_mode, CONSOLE_GUI, CONSOLE_HEADLESS)) {
r = strv_prepend(&arg_kernel_cmdline_extra,
n_pcie_ports);
size_t n_builtin_ports = n_pcie_ports - VMSPAWN_PCIE_HOTPLUG_SPARES;
- for (i = 0; i < n_pcie_ports; i++) {
+ for (size_t i = 0; i < n_pcie_ports; i++) {
char id[STRLEN("vmspawn-hotplug-pci-root-port-") + DECIMAL_STR_MAX(size_t)];
if (i < n_builtin_ports)
xsprintf(id, "vmspawn-pcieport-%zu", i);
return r;
}
}
+
/* Finalize the config file and add -readconfig to the cmdline */
r = fflush_and_check(config_file);
if (r < 0)
child_pty = safe_close(child_pty);
bridge_fds[1] = safe_close(bridge_fds[1]);
+ r = prepare_device_info(runtime_dir, &config);
+ if (r < 0)
+ return r;
+
/* Connect to VMM backend */
_cleanup_(vmspawn_qmp_bridge_freep) VmspawnQmpBridge *bridge = NULL;
r = vmspawn_qmp_init(&bridge, bridge_fds[0], event);
return r;
/* Device setup — all before resuming vCPUs */
+ r = vmspawn_qmp_setup_drives(bridge, &config.drives);
+ if (r < 0)
+ return r;
+
if (config.network.type) {
r = vmspawn_qmp_setup_network(bridge, &config.network);
if (r < 0)