From: Daan De Meyer Date: Fri, 27 Mar 2026 13:26:16 +0000 (+0100) Subject: vmspawn: Support direct kernel boot without UEFI firmware X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=bb93fdaaf3b3e47360d8f136e7137cdb352dd9ed;p=thirdparty%2Fsystemd.git vmspawn: Support direct kernel boot without UEFI firmware When --linux= specifies a non-PE kernel image, automatically disable UEFI firmware loading (as if --firmware= was passed). If --firmware= is explicitly set to a path in this case, fail with an error. Booting a UKI with --firmware= is also rejected since UKIs require UEFI. --firmware= (empty string) can also be used explicitly to disable firmware loading for PE kernels. Other changes: - Extract OVMF pflash drive setup into cmdline_add_ovmf() - Extract kernel image type detection into determine_kernel() - Add smbios_supported() helper to centralize the SMBIOS availability check (always available on x86, elsewhere requires firmware) - Gate SMM, OVMF drives, SMBIOS11 and credential SMBIOS paths on firmware/SMBIOS being available - Beef up the credential logic to fall back to fw_cfg and kernel command line in case SMBIOS is not available --- diff --git a/man/systemd-vmspawn.xml b/man/systemd-vmspawn.xml index 92872233aee..6ae4bd304a0 100644 --- a/man/systemd-vmspawn.xml +++ b/man/systemd-vmspawn.xml @@ -330,12 +330,19 @@ - Takes an absolute path, or a relative path beginning with - ./. Specifies a JSON firmware definition file, which allows selecting the - firmware to boot in the VM. If not specified, a suitable firmware is automatically discovered. If the - special string list is specified lists all discovered firmwares. If the special - string describe is specified, the firmware that would be selected (taking - into account) is printed and the program exits. + Selects which firmware to use in the VM. Takes one of auto, + uefi, bios, none, an absolute path, or a + relative path beginning with ./. Defaults to auto, which + selects UEFI firmware unless specifies a non-PE kernel image, in which + case none is selected. uefi loads OVMF firmware (use a path + to a JSON firmware definition file to select a specific one). bios skips OVMF + loading and lets QEMU use its built-in BIOS (e.g. SeaBIOS on x86). none disables + firmware loading entirely and requires to be specified for direct kernel + boot. Booting a UKI requires uefi. If the special string list + is specified, all discovered firmware definition files are listed. If the special string + describe is specified, the UEFI firmware that would be selected (taking + into account) is printed and the program exits. If an empty + string is specified, the option is reset to its default. @@ -761,6 +768,11 @@ embed a NUL byte). Note that the invoking shell might already apply unescaping once, hence this might require double escaping! + Credentials are preferably passed to the VM via SMBIOS Type 11 strings or QEMU fw_cfg files. + If neither mechanism is available, credentials are passed on the kernel command line using + systemd.set_credential_binary= which is not a confidential channel. Do not use + this for passing secrets to the VM in that case. + diff --git a/src/vmspawn/vmspawn-settings.c b/src/vmspawn/vmspawn-settings.c index 56a07b3f6f0..9382172e2ca 100644 --- a/src/vmspawn/vmspawn-settings.c +++ b/src/vmspawn/vmspawn-settings.c @@ -44,3 +44,11 @@ static const char *const console_transport_table[_CONSOLE_TRANSPORT_MAX] = { }; DEFINE_STRING_TABLE_LOOKUP(console_transport, ConsoleTransport); + +static const char *const firmware_table[_FIRMWARE_MAX] = { + [FIRMWARE_UEFI] = "uefi", + [FIRMWARE_BIOS] = "bios", + [FIRMWARE_NONE] = "none", +}; + +DEFINE_STRING_TABLE_LOOKUP(firmware, Firmware); diff --git a/src/vmspawn/vmspawn-settings.h b/src/vmspawn/vmspawn-settings.h index 83d28725359..f02b499201e 100644 --- a/src/vmspawn/vmspawn-settings.h +++ b/src/vmspawn/vmspawn-settings.h @@ -49,6 +49,14 @@ typedef enum ConsoleTransport { _CONSOLE_TRANSPORT_INVALID = -EINVAL, } ConsoleTransport; +typedef enum Firmware { + FIRMWARE_UEFI, /* load OVMF firmware */ + FIRMWARE_BIOS, /* don't load OVMF, let qemu use its built-in BIOS (e.g. SeaBIOS on x86) */ + FIRMWARE_NONE, /* no firmware at all, requires --linux= for direct kernel boot */ + _FIRMWARE_MAX, + _FIRMWARE_INVALID = -EINVAL, +} Firmware; + typedef enum SettingsMask { SETTING_START_MODE = UINT64_C(1) << 0, SETTING_MACHINE_ID = UINT64_C(1) << 6, @@ -62,4 +70,5 @@ typedef enum SettingsMask { DECLARE_STRING_TABLE_LOOKUP(console_mode, ConsoleMode); DECLARE_STRING_TABLE_LOOKUP(console_transport, ConsoleTransport); DECLARE_STRING_TABLE_LOOKUP(disk_type, DiskType); +DECLARE_STRING_TABLE_LOOKUP(firmware, Firmware); DECLARE_STRING_TABLE_LOOKUP(image_format, ImageFormat); diff --git a/src/vmspawn/vmspawn-util.h b/src/vmspawn/vmspawn-util.h index d9272b49000..4e3e4c13132 100644 --- a/src/vmspawn/vmspawn-util.h +++ b/src/vmspawn/vmspawn-util.h @@ -39,6 +39,26 @@ # define ARCHITECTURE_SUPPORTS_CXL 0 #endif +#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__riscv) || defined(__loongarch64) +# define ARCHITECTURE_SUPPORTS_FW_CFG 1 +#else +# define ARCHITECTURE_SUPPORTS_FW_CFG 0 +#endif + +/* QEMU's fw_cfg file path buffer is FW_CFG_MAX_FILE_PATH (56) bytes including NUL */ +#define QEMU_FW_CFG_MAX_KEY_LEN 55 + +/* These match the kernel's COMMAND_LINE_SIZE for each architecture */ +#if defined(__loongarch64) +# define KERNEL_CMDLINE_SIZE 4096 +#elif defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) +# define KERNEL_CMDLINE_SIZE 2048 +#elif defined(__arm__) || defined(__riscv) +# define KERNEL_CMDLINE_SIZE 1024 +#else +# define KERNEL_CMDLINE_SIZE 512 +#endif + #if defined(__x86_64__) || defined(__i386__) # define QEMU_MACHINE_TYPE "q35" #elif defined(__arm__) || defined(__aarch64__) || defined(__riscv) || defined(__loongarch64) || defined(__m68k__) diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index 93c8fbbbbd2..5065f387440 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -138,6 +138,7 @@ static int arg_vsock = -1; static unsigned arg_vsock_cid = VMADDR_CID_ANY; static int arg_tpm = -1; static char *arg_linux = NULL; +static KernelImageType arg_linux_image_type = _KERNEL_IMAGE_TYPE_INVALID; static char **arg_initrds = NULL; static ConsoleMode arg_console_mode = CONSOLE_INTERACTIVE; static ConsoleTransport arg_console_transport = CONSOLE_TRANSPORT_VIRTIO; @@ -146,6 +147,7 @@ static MachineCredentialContext arg_credentials = {}; static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U; static RuntimeMountContext arg_runtime_mounts = {}; static char *arg_firmware = NULL; +static Firmware arg_firmware_type = _FIRMWARE_INVALID; static bool arg_firmware_describe = false; static Set *arg_firmware_features_include = NULL; static Set *arg_firmware_features_exclude = NULL; @@ -542,8 +544,15 @@ static int parse_argv(int argc, char *argv[]) { break; } - OPTION_LONG("firmware", "PATH|list|describe", - "Select firmware definition file (or list/describe available)"): + OPTION_LONG("firmware", "auto|uefi|bios|none|PATH|list|describe", + "Select firmware to use, or a firmware definition file (or list/describe available)"): { + if (isempty(arg) || streq(arg, "auto")) { + arg_firmware = mfree(arg_firmware); + arg_firmware_type = _FIRMWARE_INVALID; + arg_firmware_describe = false; + break; + } + if (streq(arg, "list")) { _cleanup_strv_free_ char **l = NULL; @@ -563,19 +572,33 @@ static int parse_argv(int argc, char *argv[]) { /* Handled after argument parsing so that --firmware-features= is * taken into account. */ arg_firmware = mfree(arg_firmware); + /* We only look for UEFI firmware when "describe" is specified. */ + arg_firmware_type = FIRMWARE_UEFI; arg_firmware_describe = true; break; } - arg_firmware_describe = false; + Firmware f = firmware_from_string(arg); + if (f >= 0) { + arg_firmware = mfree(arg_firmware); + arg_firmware_type = f; + arg_firmware_describe = false; + break; + } - if (!isempty(arg) && !path_is_absolute(arg) && !startswith(arg, "./")) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Absolute path or path starting with './' required."); + if (!path_is_absolute(arg) && !startswith(arg, "./")) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Expected one of 'auto', 'uefi', 'bios', 'none', 'list', 'describe', or an absolute path or path starting with './', got: %s", + arg); r = parse_path_argument(arg, /* suppress_root= */ false, &arg_firmware); if (r < 0) return r; + + arg_firmware_type = FIRMWARE_UEFI; + arg_firmware_describe = false; break; + } OPTION_LONG("firmware-features", "FEATURE,...|list", "Require/exclude specific firmware features"): { @@ -1265,16 +1288,16 @@ static int on_child_exit(sd_event_source *s, const siginfo_t *si, void *userdata return 0; } -static int cmdline_add_vsock(char ***cmdline, int vsock_fd) { - int r; +static bool smbios_supported(void) { + /* SMBIOS is always available on x86 (via SeaBIOS fallback), but on + * other architectures it requires UEFI firmware to be loaded. */ + return ARCHITECTURE_SUPPORTS_SMBIOS && + (IN_SET(native_architecture(), ARCHITECTURE_X86, ARCHITECTURE_X86_64) || arg_firmware_type == FIRMWARE_UEFI); +} - assert(cmdline); +static int add_vsock_credential(int vsock_fd) { assert(vsock_fd >= 0); - r = strv_extend(cmdline, "-smbios"); - if (r < 0) - return r; - union sockaddr_union addr; socklen_t addr_len = sizeof addr.vm; if (getsockname(vsock_fd, &addr.sa, &addr_len) < 0) @@ -1283,54 +1306,57 @@ static int cmdline_add_vsock(char ***cmdline, int vsock_fd) { assert(addr_len >= sizeof addr.vm); assert(addr.vm.svm_family == AF_VSOCK); - r = strv_extendf(cmdline, "type=11,value=io.systemd.credential:vmm.notify_socket=vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port); - if (r < 0) - return r; + _cleanup_free_ char *value = NULL; + if (asprintf(&value, "vsock-stream:%u:%u", (unsigned) VMADDR_CID_HOST, addr.vm.svm_port) < 0) + return -ENOMEM; - return 0; + return machine_credential_add(&arg_credentials, "vmm.notify_socket", value, SIZE_MAX); } -static int cmdline_add_kernel_cmdline(char ***cmdline, const char *kernel, const char *smbios_dir) { +static int cmdline_add_kernel_cmdline(char ***cmdline, int smbios_dir_fd, const char *smbios_dir) { int r; assert(cmdline); + assert(smbios_dir_fd >= 0); assert(smbios_dir); if (strv_isempty(arg_kernel_cmdline_extra)) return 0; - KernelImageType type = _KERNEL_IMAGE_TYPE_INVALID; - if (kernel) { - r = inspect_kernel(AT_FDCWD, kernel, &type); - if (r < 0) - return log_error_errno(r, "Failed to determine '%s' kernel image type: %m", kernel); - } - _cleanup_free_ char *kcl = strv_join(arg_kernel_cmdline_extra, " "); if (!kcl) return log_oom(); - if (kernel && type != KERNEL_IMAGE_TYPE_UKI) { + size_t kcl_len = strlen(kcl); + if (kcl_len >= KERNEL_CMDLINE_SIZE) + return log_error_errno(SYNTHETIC_ERRNO(E2BIG), + "Kernel command line length (%zu) exceeds the kernel's COMMAND_LINE_SIZE (%d).", + kcl_len, KERNEL_CMDLINE_SIZE); + + if (arg_linux_image_type >= 0 && arg_linux_image_type != KERNEL_IMAGE_TYPE_UKI) { if (strv_extend_many(cmdline, "-append", kcl) < 0) return log_oom(); } else { - if (!ARCHITECTURE_SUPPORTS_SMBIOS) { + if (!smbios_supported()) { log_warning("Cannot append extra args to kernel cmdline, native architecture doesn't support SMBIOS, ignoring."); return 0; } FOREACH_STRING(id, "io.systemd.stub.kernel-cmdline-extra", "io.systemd.boot.kernel-cmdline-extra") { - _cleanup_free_ char *p = path_join(smbios_dir, id); - if (!p) + _cleanup_free_ char *content = strjoin(id, "=", kcl); + if (!content) return log_oom(); - r = write_string_filef( - p, - WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE|WRITE_STRING_FILE_MODE_0600, - "%s=%s", id, kcl); + r = write_string_file_at( + smbios_dir_fd, id, content, + WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE|WRITE_STRING_FILE_MODE_0600); if (r < 0) return log_error_errno(r, "Failed to write smbios kernel command line to file: %m"); + _cleanup_free_ char *p = path_join(smbios_dir, id); + if (!p) + return log_oom(); + if (strv_extend(cmdline, "-smbios") < 0) return log_oom(); @@ -1342,15 +1368,112 @@ static int cmdline_add_kernel_cmdline(char ***cmdline, const char *kernel, const return 0; } -static int cmdline_add_smbios11(char ***cmdline, const char* smbios_dir) { +static int cmdline_add_credentials(char ***cmdline, int smbios_dir_fd, const char *smbios_dir) { + int r; + + assert(cmdline); + assert(smbios_dir_fd >= 0); + assert(smbios_dir); + + FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) { + _cleanup_free_ char *cred_data_b64 = NULL; + ssize_t n; + + n = base64mem(cred->data, cred->size, &cred_data_b64); + if (n < 0) + return log_oom(); + + if (smbios_supported()) { + _cleanup_free_ char *content = NULL; + if (asprintf(&content, "io.systemd.credential.binary:%s=%s", cred->id, cred_data_b64) < 0) + return log_oom(); + + r = write_string_file_at( + smbios_dir_fd, cred->id, content, + WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE|WRITE_STRING_FILE_MODE_0600); + if (r < 0) + return log_error_errno(r, "Failed to write smbios credential file: %m"); + + _cleanup_free_ char *p = path_join(smbios_dir, cred->id); + if (!p) + return log_oom(); + + if (strv_extend(cmdline, "-smbios") < 0) + return log_oom(); + + if (strv_extend_joined(cmdline, "type=11,path=", p) < 0) + return log_oom(); + + } else if (ARCHITECTURE_SUPPORTS_FW_CFG) { + /* fw_cfg keys are limited to 55 characters */ + _cleanup_free_ char *key = strjoin("opt/io.systemd.credentials/", cred->id); + if (!key) + return log_oom(); + + if (strlen(key) <= QEMU_FW_CFG_MAX_KEY_LEN) { + r = write_data_file_atomic_at( + smbios_dir_fd, cred->id, + &IOVEC_MAKE(cred->data, cred->size), + WRITE_DATA_FILE_MODE_0400); + if (r < 0) + return log_error_errno(r, "Failed to write fw_cfg credential file: %m"); + + _cleanup_free_ char *p = path_join(smbios_dir, cred->id); + if (!p) + return log_oom(); + + if (strv_extend(cmdline, "-fw_cfg") < 0) + return log_oom(); + + if (strv_extendf(cmdline, "name=%s,file=%s", key, p) < 0) + return log_oom(); + + continue; + } + + /* Fall through to kernel command line if key is too long */ + log_notice("fw_cfg key '%s' exceeds %d character limit, passing credential via kernel command line. " + "Note that this will make literal credentials readable to unprivileged userspace.", + key, QEMU_FW_CFG_MAX_KEY_LEN); + + if (arg_linux_image_type < 0) + return log_error_errno( + SYNTHETIC_ERRNO(E2BIG), + "Cannot pass credential '%s' to VM, fw_cfg key exceeds %d character limit and no kernel for direct boot specified.", + cred->id, + QEMU_FW_CFG_MAX_KEY_LEN); + + if (strv_extendf(&arg_kernel_cmdline_extra, + "systemd.set_credential_binary=%s:%s", cred->id, cred_data_b64) < 0) + return log_oom(); + + } else if (arg_linux_image_type >= 0) { + log_notice("Both SMBIOS and fw_cfg are not supported, passing credential via kernel command line. " + "Note that this will make literal credentials readable to unprivileged userspace."); + if (strv_extendf(&arg_kernel_cmdline_extra, + "systemd.set_credential_binary=%s:%s", cred->id, cred_data_b64) < 0) + return log_oom(); + } else + return log_error_errno( + SYNTHETIC_ERRNO(EOPNOTSUPP), + "Cannot pass credential '%s' to VM, native architecture doesn't support SMBIOS or fw_cfg and no kernel for direct boot specified.", + cred->id); + } + + return 0; +} + +static int cmdline_add_smbios11(char ***cmdline, int smbios_dir_fd, const char *smbios_dir) { int r; assert(cmdline); + assert(smbios_dir_fd >= 0); + assert(smbios_dir); if (strv_isempty(arg_smbios11)) return 0; - if (!ARCHITECTURE_SUPPORTS_SMBIOS) { + if (!smbios_supported()) { log_warning("Cannot issue SMBIOS Type #11 strings, native architecture doesn't support SMBIOS, ignoring."); return 0; } @@ -1362,8 +1485,13 @@ static int cmdline_add_smbios11(char ***cmdline, const char* smbios_dir) { if (r < 0) return r; - r = write_string_file( - p, *i, + _cleanup_free_ char *fn = NULL; + r = path_extract_filename(p, &fn); + if (r < 0) + return r; + + r = write_string_file_at( + smbios_dir_fd, fn, *i, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE|WRITE_STRING_FILE_MODE_0600); if (r < 0) return log_error_errno(r, "Failed to write smbios data to smbios file %s: %m", p); @@ -2057,9 +2185,117 @@ static int disk_serial(const char *filename, size_t max_len, char **ret) { return 0; } +static int cmdline_add_ovmf(FILE *config_file, const OvmfConfig *ovmf_config, char **ret_ovmf_vars) { + int r; + + assert(config_file); + assert(ret_ovmf_vars); + + if (!ovmf_config) { + *ret_ovmf_vars = NULL; + return 0; + } + + r = qemu_config_section(config_file, "drive", "ovmf-code", + "if", "pflash", + "format", ovmf_config_format(ovmf_config), + "readonly", "on", + "file", ovmf_config->path); + if (r < 0) + return r; + + if (!ovmf_config->vars && !arg_efi_nvram_template) { + *ret_ovmf_vars = NULL; + return 0; + } + + if (arg_efi_nvram_state_mode == STATE_AUTO && !arg_ephemeral) { + assert(!arg_efi_nvram_state_path); + + r = make_sidecar_path(".efinvramstate", &arg_efi_nvram_state_path); + if (r < 0) + return r; + + log_debug("Storing EFI NVRAM state persistently under '%s'.", arg_efi_nvram_state_path); + } + + const char *vars_source = arg_efi_nvram_template ?: ovmf_config->vars; + _cleanup_close_ int target_fd = -EBADF; + _cleanup_(unlink_and_freep) char *destroy_path = NULL; + bool newly_created; + const char *state; + if (arg_efi_nvram_state_path) { + _cleanup_free_ char *d = strdup(arg_efi_nvram_state_path); + if (!d) + return log_oom(); + + target_fd = openat_report_new(AT_FDCWD, arg_efi_nvram_state_path, O_WRONLY|O_CREAT|O_CLOEXEC, 0600, &newly_created); + if (target_fd < 0) + return log_error_errno(target_fd, "Failed to open file for OVMF vars at %s: %m", arg_efi_nvram_state_path); + + if (newly_created) + destroy_path = TAKE_PTR(d); + + r = fd_verify_regular(target_fd); + if (r < 0) + return log_error_errno(r, "Not a regular file for OVMF variables at %s: %m", arg_efi_nvram_state_path); + + state = arg_efi_nvram_state_path; + } else { + _cleanup_free_ char *t = NULL; + r = tempfn_random_child(/* p= */ NULL, "vmspawn-", &t); + if (r < 0) + return log_error_errno(r, "Failed to create temporary filename: %m"); + + target_fd = open(t, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0600); + if (target_fd < 0) + return log_error_errno(errno, "Failed to create regular file for OVMF vars at %s: %m", t); + + newly_created = true; + state = *ret_ovmf_vars = TAKE_PTR(t); + } + + if (newly_created) { + _cleanup_close_ int source_fd = open(vars_source, O_RDONLY|O_CLOEXEC); + if (source_fd < 0) + return log_error_errno(errno, "Failed to open OVMF vars file %s: %m", vars_source); + + r = copy_bytes(source_fd, target_fd, UINT64_MAX, COPY_REFLINK); + if (r < 0) + return log_error_errno(r, "Failed to copy bytes from %s to %s: %m", vars_source, state); + + /* This isn't always available so don't raise an error if it fails */ + (void) copy_times(source_fd, target_fd, 0); + } + + destroy_path = mfree(destroy_path); /* disarm auto-destroy */ + + /* Mark the UEFI variable store pflash as requiring SMM access. This + * prevents the guest OS from writing to pflash directly, ensuring all + * variable updates go through the firmware's validation checks. Without + * this, secure boot keys could be overwritten by the OS. */ + if (ARCHITECTURE_SUPPORTS_SMM) { + r = qemu_config_section(config_file, "global", /* id= */ NULL, + "driver", "cfi.pflash01", + "property", "secure", + "value", "on"); + if (r < 0) + return r; + } + + r = qemu_config_section(config_file, "drive", "ovmf-vars", + "file", state, + "if", "pflash", + "format", ovmf_config_format(ovmf_config)); + if (r < 0) + return r; + + return 0; +} + static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { _cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL; - _cleanup_free_ char *qemu_binary = NULL, *mem = NULL, *kernel = NULL; + _cleanup_free_ char *qemu_binary = NULL, *mem = NULL; _cleanup_(rm_rf_physical_and_freep) char *ssh_private_key_path = NULL, *ssh_public_key_path = NULL; _cleanup_(rm_rf_subvolume_and_freep) char *snapshot_directory = NULL; _cleanup_(release_lock_file) LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT; @@ -2115,18 +2351,20 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { use_kvm = r; } - if (arg_firmware) - r = load_ovmf_config(arg_firmware, &ovmf_config); - else - r = find_ovmf_config(arg_firmware_features_include, arg_firmware_features_exclude, &ovmf_config, /* ret_firmware_json= */ NULL); - if (r < 0) - return log_error_errno(r, "Failed to find OVMF config: %m"); + if (arg_firmware_type == FIRMWARE_UEFI) { + if (arg_firmware) + r = load_ovmf_config(arg_firmware, &ovmf_config); + else + r = find_ovmf_config(arg_firmware_features_include, arg_firmware_features_exclude, &ovmf_config, /* ret_firmware_json= */ NULL); + if (r < 0) + return log_error_errno(r, "Failed to find OVMF config: %m"); - if (set_contains(arg_firmware_features_include, "secure-boot") && !ovmf_config->supports_sb) - return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), - "Secure Boot requested, but selected OVMF firmware doesn't support it."); + if (set_contains(arg_firmware_features_include, "secure-boot") && !ovmf_config->supports_sb) + return log_error_errno(SYNTHETIC_ERRNO(EMEDIUMTYPE), + "Secure Boot requested, but selected OVMF firmware doesn't support it."); - log_debug("Using OVMF firmware %s Secure Boot support.", ovmf_config->supports_sb ? "with" : "without"); + log_debug("Using OVMF firmware %s Secure Boot support.", ovmf_config->supports_sb ? "with" : "without"); + } _cleanup_(machine_bind_user_context_freep) MachineBindUserContext *bind_user_context = NULL; r = machine_bind_user_prepare( @@ -2144,19 +2382,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return r; - if (arg_linux) { - kernel = strdup(arg_linux); - if (!kernel) - return log_oom(); - } else if (arg_directory) { - /* a kernel is required for directory type images so attempt to locate a UKI under /boot and /efi */ - r = discover_boot_entry(arg_directory, &kernel, &arg_initrds); - if (r < 0) - return log_error_errno(r, "Failed to locate UKI in directory type image, please specify one with --linux=."); - - log_debug("Discovered UKI image at %s", kernel); - } - r = find_qemu_binary(&qemu_binary); if (r == -EOPNOTSUPP) return log_error_errno(r, "Native architecture is not supported by qemu."); @@ -2207,7 +2432,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return r; - if (ARCHITECTURE_SUPPORTS_SMM) { + if (ovmf_config && ARCHITECTURE_SUPPORTS_SMM) { r = qemu_config_key(config_file, "smm", on_off(ovmf_config->supports_sb)); if (r < 0) return r; @@ -2399,7 +2624,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return r; } - bool use_vsock = arg_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS; + bool use_vsock = arg_vsock > 0; if (arg_vsock < 0) { r = qemu_check_vsock_support(); if (r < 0) @@ -2595,106 +2820,19 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { } } - r = qemu_config_section(config_file, "drive", "ovmf-code", - "if", "pflash", - "format", ovmf_config_format(ovmf_config), - "readonly", "on", - "file", ovmf_config->path); + _cleanup_(unlink_and_freep) char *ovmf_vars = NULL; + r = cmdline_add_ovmf(config_file, ovmf_config, &ovmf_vars); if (r < 0) return r; - if (arg_efi_nvram_state_mode == STATE_AUTO && !arg_ephemeral) { - assert(!arg_efi_nvram_state_path); - - r = make_sidecar_path(".efinvramstate", &arg_efi_nvram_state_path); - if (r < 0) - return r; - - log_debug("Storing EFI NVRAM state persistently under '%s'.", arg_efi_nvram_state_path); - } - - _cleanup_(unlink_and_freep) char *ovmf_vars = NULL; - if (ovmf_config->vars || arg_efi_nvram_template) { - const char *vars_source = arg_efi_nvram_template ?: ovmf_config->vars; - _cleanup_close_ int target_fd = -EBADF; - _cleanup_(unlink_and_freep) char *destroy_path = NULL; - bool newly_created; - const char *state; - if (arg_efi_nvram_state_path) { - _cleanup_free_ char *d = strdup(arg_efi_nvram_state_path); - if (!d) - return log_oom(); - - target_fd = openat_report_new(AT_FDCWD, arg_efi_nvram_state_path, O_WRONLY|O_CREAT|O_CLOEXEC, 0600, &newly_created); - if (target_fd < 0) - return log_error_errno(target_fd, "Failed to open file for OVMF vars at %s: %m", arg_efi_nvram_state_path); - - if (newly_created) - destroy_path = TAKE_PTR(d); - - r = fd_verify_regular(target_fd); - if (r < 0) - return log_error_errno(r, "Not a regular file for OVMF variables at %s: %m", arg_efi_nvram_state_path); - - state = arg_efi_nvram_state_path; - } else { - _cleanup_free_ char *t = NULL; - r = tempfn_random_child(/* p= */ NULL, "vmspawn-", &t); - if (r < 0) - return log_error_errno(r, "Failed to create temporary filename: %m"); - - target_fd = open(t, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, 0600); - if (target_fd < 0) - return log_error_errno(errno, "Failed to create regular file for OVMF vars at %s: %m", t); - - newly_created = true; - state = ovmf_vars = TAKE_PTR(t); - } - - if (newly_created) { - _cleanup_close_ int source_fd = open(vars_source, O_RDONLY|O_CLOEXEC); - if (source_fd < 0) - return log_error_errno(errno, "Failed to open OVMF vars file %s: %m", vars_source); - - r = copy_bytes(source_fd, target_fd, UINT64_MAX, COPY_REFLINK); - if (r < 0) - return log_error_errno(r, "Failed to copy bytes from %s to %s: %m", vars_source, state); - - /* This isn't always available so don't raise an error if it fails */ - (void) copy_times(source_fd, target_fd, 0); - } - - destroy_path = mfree(destroy_path); /* disarm auto-destroy */ - - /* Mark the UEFI variable store pflash as requiring SMM access. This - * prevents the guest OS from writing to pflash directly, ensuring all - * variable updates go through the firmware's validation checks. Without - * this, secure boot keys could be overwritten by the OS. */ - if (ARCHITECTURE_SUPPORTS_SMM) { - r = qemu_config_section(config_file, "global", /* id= */ NULL, - "driver", "cfi.pflash01", - "property", "secure", - "value", "on"); - if (r < 0) - return r; - } - - r = qemu_config_section(config_file, "drive", "ovmf-vars", - "file", state, - "if", "pflash", - "format", ovmf_config_format(ovmf_config)); - if (r < 0) - return r; - } - - if (kernel) { - r = strv_extend_many(&cmdline, "-kernel", kernel); + if (arg_linux) { + r = strv_extend_many(&cmdline, "-kernel", arg_linux); if (r < 0) return log_oom(); /* We can't rely on gpt-auto-generator when direct kernel booting so synthesize a root= * kernel argument instead. */ - if (arg_image) { + if (arg_linux_image_type != KERNEL_IMAGE_TYPE_UKI && arg_image) { r = kernel_cmdline_maybe_append_root(); if (r < 0) return r; @@ -3074,15 +3212,11 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { } _cleanup_(rm_rf_physical_and_freep) char *smbios_dir = NULL; - r = mkdtemp_malloc("/var/tmp/vmspawn-smbios-XXXXXX", &smbios_dir); - if (r < 0) - return log_error_errno(r, "Failed to create temporary directory: %m"); - - r = cmdline_add_kernel_cmdline(&cmdline, kernel, smbios_dir); - if (r < 0) - return r; + _cleanup_close_ int smbios_dir_fd = mkdtemp_open("/var/tmp/vmspawn-smbios-XXXXXX", /* flags= */ 0, &smbios_dir); + if (smbios_dir_fd < 0) + return log_error_errno(smbios_dir_fd, "Failed to create temporary directory: %m"); - r = cmdline_add_smbios11(&cmdline, smbios_dir); + r = cmdline_add_smbios11(&cmdline, smbios_dir_fd, smbios_dir); if (r < 0) return r; @@ -3284,47 +3418,24 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return log_error_errno(r, "Failed to set credential systemd.unit-dropin.sshd-vsock@.service: %m"); } - if (ARCHITECTURE_SUPPORTS_SMBIOS) - FOREACH_ARRAY(cred, arg_credentials.credentials, arg_credentials.n_credentials) { - _cleanup_free_ char *p = NULL, *cred_data_b64 = NULL; - ssize_t n; - - n = base64mem(cred->data, cred->size, &cred_data_b64); - if (n < 0) - return log_oom(); - - p = path_join(smbios_dir, cred->id); - if (!p) - return log_oom(); - - r = write_string_filef( - p, - WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE|WRITE_STRING_FILE_MODE_0600, - "io.systemd.credential.binary:%s=%s", cred->id, cred_data_b64); - if (r < 0) - return log_error_errno(r, "Failed to write smbios credential file %s: %m", p); - - r = strv_extend(&cmdline, "-smbios"); - if (r < 0) - return log_oom(); - - r = strv_extend_joined(&cmdline, "type=11,path=", p); - if (r < 0) - return log_oom(); - } - if (use_vsock) { notify_sock_fd = open_vsock(); if (notify_sock_fd < 0) return log_error_errno(notify_sock_fd, "Failed to open VSOCK: %m"); - r = cmdline_add_vsock(&cmdline, notify_sock_fd); - if (r == -ENOMEM) - return log_oom(); + r = add_vsock_credential(notify_sock_fd); if (r < 0) - return log_error_errno(r, "Failed to call getsockname on VSOCK: %m"); + return log_error_errno(r, "Failed to add VSOCK credential: %m"); } + r = cmdline_add_credentials(&cmdline, smbios_dir_fd, smbios_dir); + if (r < 0) + return r; + + r = cmdline_add_kernel_cmdline(&cmdline, smbios_dir_fd, smbios_dir); + if (r < 0) + return r; + /* Finalize the config file and add -readconfig to the cmdline */ r = fflush_and_check(config_file); if (r < 0) @@ -3651,10 +3762,56 @@ static int determine_names(void) { return 0; } +static int determine_kernel(void) { + int r; + + if (!arg_linux && arg_directory) { + /* A kernel is required for directory type images so attempt to find one under /boot and /efi */ + r = discover_boot_entry(arg_directory, &arg_linux, &arg_initrds); + if (r < 0) + return log_error_errno(r, "Failed to locate UKI in directory type image, please specify one with --linux=."); + + log_debug("Discovered UKI image at %s", arg_linux); + } + + if (!arg_linux) { + if (arg_firmware_type == _FIRMWARE_INVALID) + arg_firmware_type = FIRMWARE_UEFI; + return 0; + } + + r = inspect_kernel(AT_FDCWD, arg_linux, &arg_linux_image_type); + if (r < 0) + return log_error_errno(r, "Failed to determine '%s' kernel image type: %m", arg_linux); + + if (arg_linux_image_type == KERNEL_IMAGE_TYPE_UNKNOWN) { + if (arg_firmware_type == FIRMWARE_UEFI) + return log_error_errno( + SYNTHETIC_ERRNO(EINVAL), + "Kernel image '%s' is not a PE binary, --firmware=uefi (or a firmware path) is not supported.", + arg_linux); + if (arg_firmware_type == _FIRMWARE_INVALID) + arg_firmware_type = FIRMWARE_NONE; + } + + if (arg_firmware_type == _FIRMWARE_INVALID) + arg_firmware_type = FIRMWARE_UEFI; + + return 0; +} + static int verify_arguments(void) { if (!strv_isempty(arg_initrds) && !arg_linux) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Option --initrd= cannot be used without --linux=."); + if (arg_firmware_type != FIRMWARE_UEFI && arg_linux_image_type == KERNEL_IMAGE_TYPE_UKI) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Booting a UKI requires --firmware=uefi."); + + if (arg_firmware_type == FIRMWARE_NONE && !arg_linux) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "--firmware=none requires --linux= to be specified."); + if (arg_image_disk_type == DISK_TYPE_VIRTIO_SCSI_CDROM) { if (arg_ephemeral) log_warning("--ephemeral has no effect with --image-disk-type=scsi-cd (CD-ROMs are read-only)."); @@ -3702,6 +3859,10 @@ static int run(int argc, char *argv[]) { if (r < 0) return r; + r = determine_kernel(); + if (r < 0) + return r; + r = verify_arguments(); if (r < 0) return r;