]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
vmspawn: add support for -D/--directory
authorSam Leonard <sam.leonard@codethink.co.uk>
Wed, 24 Jan 2024 15:02:06 +0000 (15:02 +0000)
committerSam Leonard <sam.leonard@codethink.co.uk>
Mon, 12 Feb 2024 09:46:00 +0000 (09:46 +0000)
src/vmspawn/vmspawn-util.h
src/vmspawn/vmspawn.c

index e1ce78067116479439c8f5901370528f0ef60555..90cb97ec005966af403a2e69b9b7dfb9c62b36ba 100644 (file)
 #define ARCHITECTURE_SUPPORTS_TPM 0
 #endif
 
+#if defined(__x86_64__) || defined(__i386__)
+#define ARCHITECTURE_SUPPORTS_SMM 1
+#else
+#define ARCHITECTURE_SUPPORTS_SMM 0
+#endif
+
 #if defined(__arm__) || defined(__aarch64__)
 #define DEFAULT_SERIAL_TTY "ttyAMA0"
 #elif defined(__s390__) || defined(__s390x__)
 #define DEFAULT_SERIAL_TTY "ttyS0"
 #endif
 
+#if defined(__x86_64__) || defined(__i386__)
+#define QEMU_MACHINE_TYPE "q35"
+#elif defined(__arm__) || defined(__aarch64__)
+#define QEMU_MACHINE_TYPE "virt"
+#elif defined(__s390__) || defined(__s390x__)
+#define QEMU_MACHINE_TYPE "s390-ccw-virtio"
+#elif defined(__powerpc__) || defined(__powerpc64__)
+#define QEMU_MACHINE_TYPE "pseries"
+#else
+#error "No qemu machine defined for this architecture"
+#endif
+
 typedef struct OvmfConfig {
         char *path;
         char *format;
index d5ab2a1a2708a65dad732b9411b6d765a42b1a56..89dd196427576eb2b4fc9bd5e9e1c738abd3d045 100644 (file)
@@ -38,6 +38,7 @@
 #include "path-util.h"
 #include "pretty-print.h"
 #include "process-util.h"
+#include "random-util.h"
 #include "rm-rf.h"
 #include "signal-util.h"
 #include "socket-util.h"
@@ -51,6 +52,7 @@
 
 static bool arg_quiet = false;
 static PagerFlags arg_pager_flags = 0;
+static char *arg_directory = NULL;
 static char *arg_image = NULL;
 static char *arg_machine = NULL;
 static char *arg_qemu_smp = NULL;
@@ -72,6 +74,7 @@ static bool arg_runtime_directory_created = false;
 static bool arg_privileged = false;
 static char **arg_kernel_cmdline_extra = NULL;
 
+STATIC_DESTRUCTOR_REGISTER(arg_directory, freep);
 STATIC_DESTRUCTOR_REGISTER(arg_image, freep);
 STATIC_DESTRUCTOR_REGISTER(arg_machine, freep);
 STATIC_DESTRUCTOR_REGISTER(arg_qemu_smp, freep);
@@ -99,6 +102,7 @@ static int help(void) {
                "  -q --quiet                Do not show status information\n"
                "     --no-pager             Do not pipe output into a pager\n"
                "\n%3$sImage:%4$s\n"
+               "  -D --directory=PATH       Root directory for the container\n"
                "  -i --image=PATH           Root file system disk image (or device node) for\n"
                "                            the virtual machine\n"
                "\n%3$sHost Configuration:%4$s\n"
@@ -162,6 +166,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "quiet",             no_argument,       NULL, 'q'                   },
                 { "no-pager",          no_argument,       NULL, ARG_NO_PAGER          },
                 { "image",             required_argument, NULL, 'i'                   },
+                { "directory",         required_argument, NULL, 'D'                   },
                 { "machine",           required_argument, NULL, 'M'                   },
                 { "qemu-smp",          required_argument, NULL, ARG_QEMU_SMP          },
                 { "qemu-mem",          required_argument, NULL, ARG_QEMU_MEM          },
@@ -187,7 +192,7 @@ static int parse_argv(int argc, char *argv[]) {
         assert(argv);
 
         optind = 0;
-        while ((c = getopt_long(argc, argv, "+hi:M:nq", options, NULL)) >= 0)
+        while ((c = getopt_long(argc, argv, "+hD:i:M:nq", options, NULL)) >= 0)
                 switch (c) {
                 case 'h':
                         return help();
@@ -199,6 +204,14 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_quiet = true;
                         break;
 
+                case 'D':
+                        r = parse_path_argument(optarg, false, &arg_directory);
+                        if (r < 0)
+                                return r;
+
+                        arg_settings_mask |= SETTING_DIRECTORY;
+                        break;
+
                 case 'i':
                         r = parse_path_argument(optarg, /* suppress_root= */ false, &arg_image);
                         if (r < 0)
@@ -629,6 +642,94 @@ static int discover_root(char **ret) {
                 return log_oom();
 
         *ret = TAKE_PTR(root);
+        return 0;
+}
+
+static int find_virtiofsd(char **ret) {
+        int r;
+        _cleanup_free_ char *virtiofsd = NULL;
+
+        assert(ret);
+
+        r = find_executable("virtiofsd", &virtiofsd);
+        if (r < 0 && r != -ENOENT)
+                return log_error_errno(r, "Error while searching for virtiofsd: %m");
+
+        if (!virtiofsd) {
+                FOREACH_STRING(file, "/usr/libexec/virtiofsd", "/usr/lib/virtiofsd") {
+                        if (access(file, X_OK) >= 0) {
+                                virtiofsd = strdup(file);
+                                if (!virtiofsd)
+                                        return log_oom();
+                                break;
+                        }
+
+                        if (!IN_SET(errno, ENOENT, EACCES))
+                                return log_error_errno(errno, "Error while searching for virtiofsd: %m");
+                }
+        }
+
+        if (!virtiofsd)
+                return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to find virtiofsd binary.");
+
+        *ret = TAKE_PTR(virtiofsd);
+        return 0;
+}
+
+static int start_virtiofsd(sd_bus *bus, const char *scope, const char *directory, char **ret_state_tempdir, char **ret_sock_name) {
+        _cleanup_(rm_rf_physical_and_freep) char *state_dir = NULL;
+        _cleanup_free_ char *virtiofsd = NULL, *sock_name = NULL, *scope_prefix = NULL;
+        _cleanup_(socket_service_pair_done) SocketServicePair ssp = {
+                .socket_type = SOCK_STREAM,
+        };
+        static unsigned virtiofsd_instance = 0;
+        int r;
+
+        assert(bus);
+        assert(scope);
+        assert(directory);
+        assert(ret_state_tempdir);
+        assert(ret_sock_name);
+
+        r = find_virtiofsd(&virtiofsd);
+        if (r < 0)
+                return r;
+
+        r = unit_name_to_prefix(scope, &scope_prefix);
+        if (r < 0)
+                return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
+
+        if (asprintf(&ssp.unit_name_prefix, "%s-virtiofsd-%u", scope_prefix, virtiofsd_instance++) < 0)
+                return log_oom();
+
+        state_dir = path_join(arg_runtime_directory, ssp.unit_name_prefix);
+        if (!state_dir)
+                return log_oom();
+
+        if (arg_runtime_directory_created) {
+                ssp.runtime_directory = strjoin("systemd/vmspawn/", ssp.unit_name_prefix);
+                if (!ssp.runtime_directory)
+                        return log_oom();
+        }
+
+        if (asprintf(&sock_name, "sock-%"PRIx64, random_u64()) < 0)
+                return log_oom();
+
+        ssp.listen_address = path_join(state_dir, sock_name);
+        if (!ssp.listen_address)
+                return log_oom();
+
+        /* QEMU doesn't support submounts so don't announce them */
+        ssp.exec_start = strv_new(virtiofsd, "--shared-dir", directory, "--xattr", "--fd", "3", "--no-announce-submounts");
+        if (!ssp.exec_start)
+                return log_oom();
+
+        r = start_socket_service_pair(bus, scope, &ssp);
+        if (r < 0)
+                return r;
+
+        *ret_state_tempdir = TAKE_PTR(state_dir);
+        *ret_sock_name = TAKE_PTR(sock_name);
 
         return 0;
 }
@@ -658,12 +759,12 @@ static int kernel_cmdline_maybe_append_root(void) {
 static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
         _cleanup_(ovmf_config_freep) OvmfConfig *ovmf_config = NULL;
         _cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
-        _cleanup_free_ char *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL;
+        _cleanup_free_ char *machine = NULL, *qemu_binary = NULL, *mem = NULL, *trans_scope = NULL, *kernel = NULL;
         _cleanup_close_ int notify_sock_fd = -EBADF;
         _cleanup_strv_free_ char **cmdline = NULL;
         _cleanup_free_ int *pass_fds = NULL;
         size_t n_pass_fds = 0;
-        const char *machine, *accel;
+        const char *accel, *shm;
         int r;
 
         if (arg_privileged)
@@ -697,18 +798,28 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                 log_warning("Couldn't find OVMF firmware blob with Secure Boot support, "
                             "falling back to OVMF firmware blobs without Secure Boot support.");
 
+        shm = arg_directory ? ",memory-backend=mem" : "";
+        if (ARCHITECTURE_SUPPORTS_SMM)
+                machine = strjoin("type=" QEMU_MACHINE_TYPE ",smm=", on_off(ovmf_config->supports_sb), shm);
+        else
+                machine = strjoin("type=" QEMU_MACHINE_TYPE, shm);
+        if (!machine)
+                return log_oom();
+
+        if (arg_linux) {
+                kernel = strdup(arg_linux);
+                if (!kernel)
+                        return log_oom();
+        } else if (arg_directory)
+                return log_error_errno(SYNTHETIC_ERRNO(ENOENT), "Please specify a kernel (--linux=) when using -D/--directory=, refusing.");
+
         r = find_qemu_binary(&qemu_binary);
         if (r == -EOPNOTSUPP)
                 return log_error_errno(r, "Native architecture is not supported by qemu.");
         if (r < 0)
                 return log_error_errno(r, "Failed to find QEMU binary: %m");
 
-        if (IN_SET(native_architecture(), ARCHITECTURE_ARM64, ARCHITECTURE_ARM64_BE))
-                machine = "type=virt";
-        else
-                machine = ovmf_config->supports_sb ? "type=q35,smm=on" : "type=q35,smm=off";
-
-        if (asprintf(&mem, "%" PRIu64, DIV_ROUND_UP(arg_qemu_mem, U64_MB)) < 0)
+        if (asprintf(&mem, "%" PRIu64 "M", DIV_ROUND_UP(arg_qemu_mem, U64_MB)) < 0)
                 return log_oom();
 
         cmdline = strv_new(
@@ -723,7 +834,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                 return log_oom();
 
         /* if we are going to be starting any units with state then create our runtime dir */
-        if (arg_tpm != 0) {
+        if (arg_tpm != 0 || arg_directory) {
                 r = runtime_directory(&arg_runtime_directory, arg_privileged ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER, "systemd/vmspawn");
                 if (r < 0)
                         return log_error_errno(r, "Failed to lookup runtime directory: %m");
@@ -745,6 +856,26 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
         if (r < 0)
                 return log_oom();
 
+        /* A shared memory backend might increase ram usage so only add one if actually necessary for virtiofsd. */
+        if (arg_directory) {
+                r = strv_extend(&cmdline, "-object");
+                if (r < 0)
+                        return log_oom();
+
+                r = strv_extendf(&cmdline, "memory-backend-memfd,id=mem,size=%s,share=on", mem);
+                if (r < 0)
+                        return log_oom();
+        }
+
+        bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
+        if (arg_qemu_vsock < 0) {
+                r = qemu_check_vsock_support();
+                if (r < 0)
+                        return log_error_errno(r, "Failed to check for VSock support: %m");
+
+                use_vsock = r;
+        }
+
         if (!use_kvm && kvm_device_fd >= 0) {
                 log_warning("KVM is disabled but fd for /dev/kvm was passed, closing fd and ignoring");
                 kvm_device_fd = safe_close(kvm_device_fd);
@@ -776,15 +907,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
         if (r < 0)
                 return log_oom();
 
-        bool use_vsock = arg_qemu_vsock > 0 && ARCHITECTURE_SUPPORTS_SMBIOS;
-        if (arg_qemu_vsock < 0) {
-                r = qemu_check_vsock_support();
-                if (r < 0)
-                        return log_error_errno(r, "Failed to check for VSock support: %m");
-
-                use_vsock = r;
-        }
-
         _cleanup_close_ int child_vsock_fd = -EBADF;
         if (use_vsock) {
                 int device_fd = vhost_device_fd;
@@ -901,8 +1023,8 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                         return log_oom();
         }
 
-        if (arg_linux) {
-                r = strv_extend_many(&cmdline, "-kernel", arg_linux);
+        if (kernel) {
+                r = strv_extend_many(&cmdline, "-kernel", kernel);
                 if (r < 0)
                         return log_oom();
 
@@ -915,20 +1037,50 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                 }
         }
 
-        r = strv_extend(&cmdline, "-drive");
-        if (r < 0)
-                return log_oom();
+        if (arg_image) {
+                assert(!arg_directory);
 
-        r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", arg_image);
-        if (r < 0)
-                return log_oom();
+                r = strv_extend(&cmdline, "-drive");
+                if (r < 0)
+                        return log_oom();
 
-        r = strv_extend_many(
-                        &cmdline,
+                r = strv_extendf(&cmdline, "if=none,id=mkosi,file=%s,format=raw", arg_image);
+                if (r < 0)
+                        return log_oom();
+
+                r = strv_extend_many(&cmdline,
                         "-device", "virtio-scsi-pci,id=scsi",
                         "-device", "scsi-hd,drive=mkosi,bootindex=1");
-        if (r < 0)
-                return log_oom();
+                if (r < 0)
+                        return log_oom();
+        }
+
+        if (arg_directory) {
+                _cleanup_free_ char *sock_path = NULL, *sock_name = NULL;
+                r = start_virtiofsd(bus, trans_scope, arg_directory, &sock_path, &sock_name);
+                if (r < 0)
+                        return r;
+
+                r = strv_extend(&cmdline, "-chardev");
+                if (r < 0)
+                        return log_oom();
+
+                r = strv_extendf(&cmdline, "socket,id=%1$s,path=%2$s/%1$s", sock_name, sock_path);
+                if (r < 0)
+                        return log_oom();
+
+                r = strv_extend(&cmdline, "-device");
+                if (r < 0)
+                        return log_oom();
+
+                r = strv_extendf(&cmdline, "vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=root", sock_name);
+                if (r < 0)
+                        return log_oom();
+
+                r = strv_extend(&arg_kernel_cmdline_extra, "root=root rootfstype=virtiofs rw");
+                if (r < 0)
+                        return log_oom();
+        }
 
         r = strv_prepend(&arg_kernel_cmdline_extra, "console=" DEFAULT_SERIAL_TTY);
         if (r < 0)
@@ -939,7 +1091,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                 if (!kcl)
                         return log_oom();
 
-                if (arg_linux) {
+                if (kernel) {
                         r = strv_extend_many(&cmdline, "-append", kcl);
                         if (r < 0)
                                 return log_oom();
@@ -1106,20 +1258,30 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
 static int determine_names(void) {
         int r;
 
-        if (!arg_image)
-                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Missing required argument -i/--image=, quitting");
+        if (!arg_directory && !arg_image)
+                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine path, please use -D or -i.");
 
         if (!arg_machine) {
-                char *e;
+                if (arg_directory && path_equal(arg_directory, "/")) {
+                        arg_machine = gethostname_malloc();
+                        if (!arg_machine)
+                                return log_oom();
+                } else if (arg_image) {
+                        char *e;
 
-                r = path_extract_filename(arg_image, &arg_machine);
-                if (r < 0)
-                        return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image);
+                        r = path_extract_filename(arg_image, &arg_machine);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_image);
 
-                /* Truncate suffix if there is one */
-                e = endswith(arg_machine, ".raw");
-                if (e)
-                        *e = 0;
+                        /* Truncate suffix if there is one */
+                        e = endswith(arg_machine, ".raw");
+                        if (e)
+                                *e = 0;
+                } else {
+                        r = path_extract_filename(arg_directory, &arg_machine);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to extract file name from '%s': %m", arg_directory);
+                }
 
                 hostname_cleanup(arg_machine);
                 if (!hostname_is_valid(arg_machine, 0))
@@ -1158,11 +1320,12 @@ static int run(int argc, char *argv[]) {
 
         if (!arg_quiet) {
                 _cleanup_free_ char *u = NULL;
-                (void) terminal_urlify_path(arg_image, arg_image, &u);
+                const char *vm_path = arg_image ?: arg_directory;
+                (void) terminal_urlify_path(vm_path, vm_path, &u);
 
                 log_info("%s %sSpawning VM %s on %s.%s\n"
                          "%s %sPress %sCtrl-a x%s to kill VM.%s",
-                         special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), arg_machine, u ?: arg_image, ansi_normal(),
+                         special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), arg_machine, u ?: vm_path, ansi_normal(),
                          special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), ansi_grey(), ansi_highlight(), ansi_grey(), ansi_normal());
         }