From: Sam Leonard Date: Wed, 24 Jan 2024 15:02:06 +0000 (+0000) Subject: vmspawn: add support for -D/--directory X-Git-Tag: v256-rc1~899^2~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5c57a8650630304d8434a348283b6b643fbc676b;p=thirdparty%2Fsystemd.git vmspawn: add support for -D/--directory --- diff --git a/src/vmspawn/vmspawn-util.h b/src/vmspawn/vmspawn-util.h index e1ce7806711..90cb97ec005 100644 --- a/src/vmspawn/vmspawn-util.h +++ b/src/vmspawn/vmspawn-util.h @@ -16,6 +16,12 @@ #define ARCHITECTURE_SUPPORTS_TPM 0 #endif +#if defined(__x86_64__) || defined(__i386__) +#define ARCHITECTURE_SUPPORTS_SMM 1 +#else +#define ARCHITECTURE_SUPPORTS_SMM 0 +#endif + #if defined(__arm__) || defined(__aarch64__) #define DEFAULT_SERIAL_TTY "ttyAMA0" #elif defined(__s390__) || defined(__s390x__) @@ -26,6 +32,18 @@ #define DEFAULT_SERIAL_TTY "ttyS0" #endif +#if defined(__x86_64__) || defined(__i386__) +#define QEMU_MACHINE_TYPE "q35" +#elif defined(__arm__) || defined(__aarch64__) +#define QEMU_MACHINE_TYPE "virt" +#elif defined(__s390__) || defined(__s390x__) +#define QEMU_MACHINE_TYPE "s390-ccw-virtio" +#elif defined(__powerpc__) || defined(__powerpc64__) +#define QEMU_MACHINE_TYPE "pseries" +#else +#error "No qemu machine defined for this architecture" +#endif + typedef struct OvmfConfig { char *path; char *format; diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c index d5ab2a1a270..89dd1964275 100644 --- a/src/vmspawn/vmspawn.c +++ b/src/vmspawn/vmspawn.c @@ -38,6 +38,7 @@ #include "path-util.h" #include "pretty-print.h" #include "process-util.h" +#include "random-util.h" #include "rm-rf.h" #include "signal-util.h" #include "socket-util.h" @@ -51,6 +52,7 @@ static bool arg_quiet = false; static PagerFlags arg_pager_flags = 0; +static char *arg_directory = NULL; static char *arg_image = NULL; static char *arg_machine = NULL; static char *arg_qemu_smp = NULL; @@ -72,6 +74,7 @@ static bool arg_runtime_directory_created = false; static bool arg_privileged = false; static char **arg_kernel_cmdline_extra = NULL; +STATIC_DESTRUCTOR_REGISTER(arg_directory, freep); STATIC_DESTRUCTOR_REGISTER(arg_image, freep); STATIC_DESTRUCTOR_REGISTER(arg_machine, freep); STATIC_DESTRUCTOR_REGISTER(arg_qemu_smp, freep); @@ -99,6 +102,7 @@ static int help(void) { " -q --quiet Do not show status information\n" " --no-pager Do not pipe output into a pager\n" "\n%3$sImage:%4$s\n" + " -D --directory=PATH Root directory for the container\n" " -i --image=PATH Root file system disk image (or device node) for\n" " the virtual machine\n" "\n%3$sHost Configuration:%4$s\n" @@ -162,6 +166,7 @@ static int parse_argv(int argc, char *argv[]) { { "quiet", no_argument, NULL, 'q' }, { "no-pager", no_argument, NULL, ARG_NO_PAGER }, { "image", required_argument, NULL, 'i' }, + { "directory", required_argument, NULL, 'D' }, { "machine", required_argument, NULL, 'M' }, { "qemu-smp", required_argument, NULL, ARG_QEMU_SMP }, { "qemu-mem", required_argument, NULL, ARG_QEMU_MEM }, @@ -187,7 +192,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argv); optind = 0; - while ((c = getopt_long(argc, argv, "+hi:M:nq", options, NULL)) >= 0) + while ((c = getopt_long(argc, argv, "+hD:i:M:nq", options, NULL)) >= 0) switch (c) { case 'h': return help(); @@ -199,6 +204,14 @@ static int parse_argv(int argc, char *argv[]) { arg_quiet = true; break; + case 'D': + r = parse_path_argument(optarg, false, &arg_directory); + if (r < 0) + return r; + + arg_settings_mask |= SETTING_DIRECTORY; + break; + case 'i': r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image); if (r < 0) @@ -629,6 +642,94 @@ static int discover_root(char **ret) { return log_oom(); *ret = TAKE_PTR(root); + return 0; +} + +static int find_virtiofsd(char **ret) { + int r; + _cleanup_free_ char *virtiofsd = NULL; + + assert(ret); + + r = find_executable("virtiofsd", &virtiofsd); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Error while searching for virtiofsd: %m"); + + if (!virtiofsd) { + FOREACH_STRING(file, "/usr/libexec/virtiofsd", "/usr/lib/virtiofsd") { + if (access(file, X_OK) >= 0) { + virtiofsd = strdup(file); + if (!virtiofsd) + return log_oom(); + break; + } + + if (!IN_SET(errno, ENOENT, EACCES)) + return log_error_errno(errno, "Error while searching for virtiofsd: %m"); + } + } + + if (!virtiofsd) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to find virtiofsd binary."); + + *ret = TAKE_PTR(virtiofsd); + return 0; +} + +static int start_virtiofsd(sd_bus *bus, const char *scope, const char *directory, char **ret_state_tempdir, char **ret_sock_name) { + _cleanup_(rm_rf_physical_and_freep) char *state_dir = NULL; + _cleanup_free_ char *virtiofsd = NULL, *sock_name = NULL, *scope_prefix = NULL; + _cleanup_(socket_service_pair_done) SocketServicePair ssp = { + .socket_type = SOCK_STREAM, + }; + static unsigned virtiofsd_instance = 0; + int r; + + assert(bus); + assert(scope); + assert(directory); + assert(ret_state_tempdir); + assert(ret_sock_name); + + r = find_virtiofsd(&virtiofsd); + if (r < 0) + return r; + + r = unit_name_to_prefix(scope, &scope_prefix); + if (r < 0) + return log_error_errno(r, "Failed to strip .scope suffix from scope: %m"); + + if (asprintf(&ssp.unit_name_prefix, "%s-virtiofsd-%u", scope_prefix, virtiofsd_instance++) < 0) + return log_oom(); + + state_dir = path_join(arg_runtime_directory, ssp.unit_name_prefix); + if (!state_dir) + return log_oom(); + + if (arg_runtime_directory_created) { + ssp.runtime_directory = strjoin("systemd/vmspawn/", ssp.unit_name_prefix); + if (!ssp.runtime_directory) + return log_oom(); + } + + if (asprintf(&sock_name, "sock-%"PRIx64, random_u64()) < 0) + return log_oom(); + + ssp.listen_address = path_join(state_dir, sock_name); + if (!ssp.listen_address) + return log_oom(); + + /* QEMU doesn't support submounts so don't announce them */ + ssp.exec_start = strv_new(virtiofsd, "--shared-dir", directory, "--xattr", "--fd", "3", "--no-announce-submounts"); + if (!ssp.exec_start) + return log_oom(); + + r = start_socket_service_pair(bus, scope, &ssp); + if (r < 0) + return r; + + *ret_state_tempdir = TAKE_PTR(state_dir); + *ret_sock_name = TAKE_PTR(sock_name); return 0; } @@ -658,12 +759,12 @@ static int kernel_cmdline_maybe_append_root(void) { static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { _cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL; _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL; - _cleanup_free_ char *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL; + _cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL, *kernel = NULL; _cleanup_close_ int notify_sock_fd = -EBADF; _cleanup_strv_free_ char **cmdline = NULL; _cleanup_free_ int *pass_fds = NULL; size_t n_pass_fds = 0; - const char *machine, *accel; + const char *accel, *shm; int r; if (arg_privileged) @@ -697,18 +798,28 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { log_warning("Couldn't find OVMF firmware blob with Secure Boot support, " "falling back to OVMF firmware blobs without Secure Boot support."); + shm = arg_directory ? ",memory-backend=mem" : ""; + if (ARCHITECTURE_SUPPORTS_SMM) + machine = strjoin("type=" QEMU_MACHINE_TYPE ",smm=", on_off(ovmf_config->supports_sb), shm); + else + machine = strjoin("type=" QEMU_MACHINE_TYPE, shm); + if (!machine) + return log_oom(); + + if (arg_linux) { + kernel = strdup(arg_linux); + if (!kernel) + return log_oom(); + } else if (arg_directory) + return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Please specify a kernel (--linux=) when using -D/--directory=, refusing."); + r = find_qemu_binary(&qemu_binary); if (r == -EOPNOTSUPP) return log_error_errno(r, "Native architecture is not supported by qemu."); if (r < 0) return log_error_errno(r, "Failed to find QEMU binary: %m"); - if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE)) - machine = "type=virt"; - else - machine = ovmf_config->supports_sb ? "type=q35,smm=on" : "type=q35,smm=off"; - - if (asprintf(&mem, "%" PRIu64, DIV_ROUND_UP(arg_qemu_mem, U64_MB)) < 0) + if (asprintf(&mem, "%" PRIu64 "M", DIV_ROUND_UP(arg_qemu_mem, U64_MB)) < 0) return log_oom(); cmdline = strv_new( @@ -723,7 +834,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return log_oom(); /* if we are going to be starting any units with state then create our runtime dir */ - if (arg_tpm != 0) { + if (arg_tpm != 0 || arg_directory) { r = runtime_directory(&arg_runtime_directory, arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, "systemd/vmspawn"); if (r < 0) return log_error_errno(r, "Failed to lookup runtime directory: %m"); @@ -745,6 +856,26 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return log_oom(); + /* A shared memory backend might increase ram usage so only add one if actually necessary for virtiofsd. */ + if (arg_directory) { + r = strv_extend(&cmdline, "-object"); + if (r < 0) + return log_oom(); + + r = strv_extendf(&cmdline, "memory-backend-memfd,id=mem,size=%s,share=on", mem); + if (r < 0) + return log_oom(); + } + + bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS; + if (arg_qemu_vsock < 0) { + r = qemu_check_vsock_support(); + if (r < 0) + return log_error_errno(r, "Failed to check for VSock support: %m"); + + use_vsock = r; + } + if (!use_kvm && kvm_device_fd >= 0) { log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring"); kvm_device_fd = safe_close(kvm_device_fd); @@ -776,15 +907,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (r < 0) return log_oom(); - bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS; - if (arg_qemu_vsock < 0) { - r = qemu_check_vsock_support(); - if (r < 0) - return log_error_errno(r, "Failed to check for VSock support: %m"); - - use_vsock = r; - } - _cleanup_close_ int child_vsock_fd = -EBADF; if (use_vsock) { int device_fd = vhost_device_fd; @@ -901,8 +1023,8 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { return log_oom(); } - if (arg_linux) { - r = strv_extend_many(&cmdline, "-kernel", arg_linux); + if (kernel) { + r = strv_extend_many(&cmdline, "-kernel", kernel); if (r < 0) return log_oom(); @@ -915,20 +1037,50 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { } } - r = strv_extend(&cmdline, "-drive"); - if (r < 0) - return log_oom(); + if (arg_image) { + assert(!arg_directory); - r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", arg_image); - if (r < 0) - return log_oom(); + r = strv_extend(&cmdline, "-drive"); + if (r < 0) + return log_oom(); - r = strv_extend_many( - &cmdline, + r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", arg_image); + if (r < 0) + return log_oom(); + + r = strv_extend_many(&cmdline, "-device", "virtio-scsi-pci,id=scsi", "-device", "scsi-hd,drive=mkosi,bootindex=1"); - if (r < 0) - return log_oom(); + if (r < 0) + return log_oom(); + } + + if (arg_directory) { + _cleanup_free_ char *sock_path = NULL, *sock_name = NULL; + r = start_virtiofsd(bus, trans_scope, arg_directory, &sock_path, &sock_name); + if (r < 0) + return r; + + r = strv_extend(&cmdline, "-chardev"); + if (r < 0) + return log_oom(); + + r = strv_extendf(&cmdline, "socket,id=%1$s,path=%2$s/%1$s", sock_name, sock_path); + if (r < 0) + return log_oom(); + + r = strv_extend(&cmdline, "-device"); + if (r < 0) + return log_oom(); + + r = strv_extendf(&cmdline, "vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=root", sock_name); + if (r < 0) + return log_oom(); + + r = strv_extend(&arg_kernel_cmdline_extra, "root=root rootfstype=virtiofs rw"); + if (r < 0) + return log_oom(); + } r = strv_prepend(&arg_kernel_cmdline_extra, "console=" DEFAULT_SERIAL_TTY); if (r < 0) @@ -939,7 +1091,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { if (!kcl) return log_oom(); - if (arg_linux) { + if (kernel) { r = strv_extend_many(&cmdline, "-append", kcl); if (r < 0) return log_oom(); @@ -1106,20 +1258,30 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) { static int determine_names(void) { int r; - if (!arg_image) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing required argument -i/--image=, quitting"); + if (!arg_directory && !arg_image) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine path, please use -D or -i."); if (!arg_machine) { - char *e; + if (arg_directory && path_equal(arg_directory, "/")) { + arg_machine = gethostname_malloc(); + if (!arg_machine) + return log_oom(); + } else if (arg_image) { + char *e; - r = path_extract_filename(arg_image, &arg_machine); - if (r < 0) - return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image); + r = path_extract_filename(arg_image, &arg_machine); + if (r < 0) + return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image); - /* Truncate suffix if there is one */ - e = endswith(arg_machine, ".raw"); - if (e) - *e = 0; + /* Truncate suffix if there is one */ + e = endswith(arg_machine, ".raw"); + if (e) + *e = 0; + } else { + r = path_extract_filename(arg_directory, &arg_machine); + if (r < 0) + return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_directory); + } hostname_cleanup(arg_machine); if (!hostname_is_valid(arg_machine, 0)) @@ -1158,11 +1320,12 @@ static int run(int argc, char *argv[]) { if (!arg_quiet) { _cleanup_free_ char *u = NULL; - (void) terminal_urlify_path(arg_image, arg_image, &u); + const char *vm_path = arg_image ?: arg_directory; + (void) terminal_urlify_path(vm_path, vm_path, &u); log_info("%s %sSpawning VM %s on %s.%s\n" "%s %sPress %sCtrl-a x%s to kill VM.%s", - special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), arg_machine, u ?: arg_image, ansi_normal(), + special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), arg_machine, u ?: vm_path, ansi_normal(), special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal()); }