From: Christian Brauner Date: Wed, 8 Apr 2026 07:02:26 +0000 (+0200) Subject: vmspawn: pre-allocate PCIe root ports for device hotplug X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6bfa7aa0509a3d612ffef5d56b7663454765e8ac;p=thirdparty%2Fsystemd.git vmspawn: pre-allocate PCIe root ports for device hotplug On PCIe machine types (q35, virt), QMP device_add is always hotplug — even with vCPUs stopped. The root PCIe bus (pcie.0) does not support hotplugging; only pcie-root-port bridges do. Pre-allocate enough root ports in the QEMU config file for all devices that will be set up via QMP, plus 10 spare ports for future runtime hotplug. Add ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS macro to guard PCIe-specific setup on x86, ARM, RISC-V, and LoongArch (the architectures whose QEMU machine type is q35 or virt). Signed-off-by: Christian Brauner (Amutable) --- diff --git a/src/vmspawn/vmspawn-util.h b/src/vmspawn/vmspawn-util.h index 4e3e4c13132..38bb331dfc3 100644 --- a/src/vmspawn/vmspawn-util.h +++ b/src/vmspawn/vmspawn-util.h @@ -59,20 +59,33 @@ # define KERNEL_CMDLINE_SIZE 512 #endif +/* ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS is co-located with QEMU_MACHINE_TYPE so they stay in + * sync: q35 and virt machine types need pcie-root-port bridges for QMP device_add hotplug. + * Exception: m68k's "virt" uses virtio-mmio, not PCIe, so it doesn't need root ports. */ #if defined(__x86_64__) || defined(__i386__) # define QEMU_MACHINE_TYPE "q35" -#elif defined(__arm__) || defined(__aarch64__) || defined(__riscv) || defined(__loongarch64) || defined(__m68k__) +# define ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS 1 +#elif defined(__m68k__) # define QEMU_MACHINE_TYPE "virt" +# define ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS 0 +#elif defined(__arm__) || defined(__aarch64__) || defined(__riscv) || defined(__loongarch64) +# define QEMU_MACHINE_TYPE "virt" +# define ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS 1 #elif defined(__s390__) || defined(__s390x__) # define QEMU_MACHINE_TYPE "s390-ccw-virtio" +# define ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS 0 #elif defined(__powerpc__) || defined(__powerpc64__) # define QEMU_MACHINE_TYPE "pseries" +# define ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS 0 #elif defined(__mips__) # define QEMU_MACHINE_TYPE "malta" +# define ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS 0 #elif defined(__sparc__) # define QEMU_MACHINE_TYPE "sun4u" +# define ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS 0 #else # define QEMU_MACHINE_TYPE "none" +# define ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS 0 #endif #if defined(__arm__) || defined(__aarch64__) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index fe7d307a637..587f2da800b 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -100,6 +100,9 @@ #define DISK_SERIAL_MAX_LEN_SCSI 30 #define DISK_SERIAL_MAX_LEN_NVME 20 +/* Spare pcie-root-ports reserved for future runtime hotplug beyond the pre-wired devices. */ +#define VMSPAWN_PCIE_HOTPLUG_SPARES 10u + /* An enum controlling how auxiliary state for the VM are maintained, i.e. the TPM state and the EFI variable * NVRAM. */ typedef enum StateMode { @@ -3443,6 +3446,56 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return r; + + /* Pre-allocate PCIe root ports for QMP device_add hotplug. On PCIe machine types + * (q35, virt), QMP device_add is always hotplug — the root bus (pcie.0) does not support + * it. Each root port provides one slot for hotplug. We create enough ports for all devices + * that will be set up via QMP, plus VMSPAWN_PCIE_HOTPLUG_SPARES spare ports for future + * runtime hotplug. */ + if (ARCHITECTURE_NEEDS_PCIE_ROOT_PORTS) { + /* Count maximum possible PCI devices: root image + extra drives + SCSI controller + + * network + virtiofs mounts + vsock. The actual count may be lower (e.g. no network, + * no SCSI), but unused ports have negligible overhead. */ + size_t n_pcie_ports = 1 + + arg_extra_drives.n_drives + /* drives */ + 1 + /* SCSI controller */ + 1 + /* network */ + (arg_directory ? 1 : 0) + /* rootdir virtiofs */ + arg_runtime_mounts.n_mounts + /* extra virtiofs mounts */ + 1 + /* vsock */ + VMSPAWN_PCIE_HOTPLUG_SPARES; /* reserved for future hotplug */ + + /* Guard the unsigned subtraction below against future refactors that might drop the + * fixed additions. */ + assert(n_pcie_ports >= VMSPAWN_PCIE_HOTPLUG_SPARES); + + /* QEMU's pcie-root-port chassis/slot are uint8_t — i+1 must fit. */ + if (n_pcie_ports > UINT8_MAX) + return log_error_errno(SYNTHETIC_ERRNO(E2BIG), + "Too many PCIe root ports requested (%zu, max 255). " + "Reduce the number of extra drives or runtime mounts.", + n_pcie_ports); + + size_t n_builtin_ports = n_pcie_ports - VMSPAWN_PCIE_HOTPLUG_SPARES; + for (i = 0; i < n_pcie_ports; i++) { + char id[STRLEN("vmspawn-hotplug-pci-root-port-") + DECIMAL_STR_MAX(size_t)]; + if (i < n_builtin_ports) + xsprintf(id, "vmspawn-pcieport-%zu", i); + else + xsprintf(id, "vmspawn-hotplug-pci-root-port-%zu", i - n_builtin_ports); + + r = qemu_config_section(config_file, "device", id, + "driver", "pcie-root-port"); + if (r < 0) + return r; + r = qemu_config_keyf(config_file, "chassis", "%zu", i + 1); + if (r < 0) + return r; + r = qemu_config_keyf(config_file, "slot", "%zu", i + 1); + if (r < 0) + return r; + } + } /* Finalize the config file and add -readconfig to the cmdline */ r = fflush_and_check(config_file); if (r < 0)