vmspawn/nspawn: Always use a per-machine runtime subdirectory

author Kai Lüke <kai@amutable.com>

Mon, 15 Jun 2026 13:11:41 +0000 (15:11 +0200)

committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>

Thu, 2 Jul 2026 15:24:18 +0000 (17:24 +0200)
author Kai Lüke <kai@amutable.com>
Mon, 15 Jun 2026 13:11:41 +0000 (15:11 +0200)
committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Thu, 2 Jul 2026 15:24:18 +0000 (17:24 +0200)
diff --git a/src/libsystemd/sd-path/path-lookup.c b/src/libsystemd/sd-path/path-lookup.c

index b0f10e4b5af236fe98b4f422ebb6207959d8ebf9..ee97164bdc02abd8c010b98d9f2c787242a497c7 100644 (file)
--- a/src/libsystemd/sd-path/path-lookup.c
+++ b/src/libsystemd/sd-path/path-lookup.c
@@ -102,40 +102,52 @@ int runtime_directory(RuntimeScope scope, const char *fallback_suffix, char **re
          return 1;
  }
  
-int runtime_directory_make(RuntimeScope scope, const char *subdir, char **ret_dir, char **ret_dir_destroy) {
-        _cleanup_free_ char *dir = NULL, *destroy = NULL;
+int runtime_directory_resolve(RuntimeScope scope, const char *suffix, const char *identifier, char **ret) {
+        _cleanup_free_ char *base = NULL, *dir = NULL;
          int r;
  
-        assert(subdir);
-        assert(ret_dir);
-        assert(ret_dir_destroy);
+        assert(suffix);
+        assert(identifier);
+        assert(ret);
+
+        /* This should not be able to escape the base directory, forbid ../ and similar */
+        if (!filename_is_valid(identifier))
+                return -EINVAL;
  
-        /* Use runtime_directory() (not _generic()) so that when we run in a systemd service
-         * with RuntimeDirectory= set, we pick up $RUNTIME_DIRECTORY and place our stuff into the
-         * directory the service manager prepared for us. When the env var is unset, we fall back
-         * to the provided subdirectory under /run (or the $XDG_RUNTIME_DIR equivalent in user scope)
-         * and take care of creation and destruction ourselves. */
-        r = runtime_directory(scope, subdir, &dir);
+        /* Resolve a runtime directory <base>/<identifier>. The base is $RUNTIME_DIRECTORY when we run as a
+         * systemd service with RuntimeDirectory= set (the service manager prepared it for us), otherwise the
+         * provided suffix under /run (or the $XDG_RUNTIME_DIR equivalent in user scope). The per-identifier
+         * subdirectory keeps concurrent instances from colliding even when they share a base directory, and
+         * lets them use short entry names within it. */
+        r = runtime_directory(scope, suffix, &base);
          if (r < 0)
                  return r;
  
-        if (r > 0) {
-                /* $RUNTIME_DIRECTORY was not set, so we got the fallback path and need to create and
-                 * clean up the directory ourselves. */
-                destroy = strdup(dir);
-                if (!destroy)
-                        return -ENOMEM;
+        dir = path_join(base, identifier);
+        if (!dir)
+                return -ENOMEM;
  
-                r = mkdir_p(dir, 0755);
-                if (r < 0)
-                        return r;
-        }
+        *ret = TAKE_PTR(dir);
+        return 0;
+}
  
-        /* When $RUNTIME_DIRECTORY is set the service manager created the directory for us and
-         * will destroy it (or preserve it, per RuntimeDirectoryPreserve=) when the service stops. */
+int runtime_directory_make(RuntimeScope scope, const char *suffix, const char *identifier, char **ret) {
+        _cleanup_free_ char *dir = NULL;
+        int r;
+
+        assert(ret);
+
+        r = runtime_directory_resolve(scope, suffix, identifier, &dir);
+        if (r < 0)
+                return r;
+
+        /* We always create and destroy the subdirectory ourselves, the service manager only owns the base
+         * it handed us. The caller is expected to remove the subdirectory. */
+        r = mkdir_p(dir, 0755);
+        if (r < 0)
+                return r;
  
-        *ret_dir = TAKE_PTR(dir);
-        *ret_dir_destroy = TAKE_PTR(destroy);
+        *ret = TAKE_PTR(dir);
  
          return 0;
  }
diff --git a/src/libsystemd/sd-path/path-lookup.h b/src/libsystemd/sd-path/path-lookup.h

index 80e37a571c59c36d1705b4c027446366b183f0f2..7f5f1999a64404c4d779623535d86d49c2f8e81c 100644 (file)
--- a/src/libsystemd/sd-path/path-lookup.h
+++ b/src/libsystemd/sd-path/path-lookup.h
@@ -60,7 +60,8 @@ void lookup_paths_done(LookupPaths *p);
  int config_directory_generic(RuntimeScope scope, const char *suffix, char **ret);
  int runtime_directory_generic(RuntimeScope scope, const char *suffix, char **ret);
  int runtime_directory(RuntimeScope scope, const char *fallback_suffix, char **ret);
-int runtime_directory_make(RuntimeScope scope, const char *subdir, char **ret_dir, char **ret_dir_destroy);
+int runtime_directory_resolve(RuntimeScope scope, const char *suffix, const char *identifier, char **ret);
+int runtime_directory_make(RuntimeScope scope, const char *suffix, const char *identifier, char **ret);
  int state_directory_generic(RuntimeScope scope, const char *suffix, char **ret);
  
  /* We don't treat /etc/xdg/systemd/ in these functions as the xdg base dir spec suggests because we assume
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c

index bbd5f411d413c14a79e2775d8257daea0c5a118d..add7c9146b1c87af425eae938904f4a4ae600ba3 100644 (file)
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -6104,12 +6104,8 @@ static int do_cleanup(void) {
          if (r < 0)
                  return r;
  
-        _cleanup_free_ char *subdir = path_join("systemd/nspawn", arg_machine);
-        if (!subdir)
-                return log_oom();
-
          _cleanup_free_ char *runtime_dir = NULL;
-        r = runtime_directory(arg_runtime_scope, subdir, &runtime_dir);
+        r = runtime_directory_resolve(arg_runtime_scope, "systemd/nspawn", arg_machine, &runtime_dir);
          if (r < 0)
                  return r;
  
@@ -6133,8 +6129,7 @@ static int run(int argc, char *argv[]) {
          _cleanup_(sd_netlink_unrefp) sd_netlink *nfnl = NULL;
          _cleanup_(pidref_done) PidRef pid = PIDREF_NULL;
          _cleanup_(sd_varlink_unrefp) sd_varlink *nsresource_link = NULL, *mountfsd_link = NULL;
-        _cleanup_free_ char *runtime_dir = NULL, *subdir = NULL;
-        _cleanup_(rm_rf_physical_and_freep) char *runtime_dir_destroy = NULL;
+        _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
          _cleanup_(fork_notify_terminate) PidRef journal_remote_pidref = PIDREF_NULL;
  
          log_setup();
@@ -6629,17 +6624,11 @@ static int run(int argc, char *argv[]) {
          } else
                  assert_not_reached();
  
-        subdir = path_join("systemd/nspawn", arg_machine);
-        if (!subdir) {
-                r = log_oom();
-                goto finish;
-        }
-
          r = runtime_directory_make(
                          arg_runtime_scope,
-                        subdir,
-                        &runtime_dir,
-                        &runtime_dir_destroy);
+                        "systemd/nspawn",
+                        arg_machine,
+                        &runtime_dir);
          if (r < 0) {
                  log_error_errno(r, "Failed to create runtime directory: %m");
                  goto finish;
diff --git a/src/vmspawn/vmspawn.c b/src/vmspawn/vmspawn.c

index 00df10874ca026d929cab5cd1da94504bee05a64..f11649945036c0c8772dd46032348982a5b806c1 100644 (file)
--- a/src/vmspawn/vmspawn.c
+++ b/src/vmspawn/vmspawn.c
@@ -1563,7 +1563,6 @@ static int cmdline_add_smbios11(char ***cmdline, int smbios_dir_fd, const char *
  }
  
  static int start_tpm(
-                const char *scope,
                  const char *swtpm,
                  const char *runtime_dir,
                  const char *sd_socket_activate,
@@ -1572,30 +1571,26 @@ static int start_tpm(
  
          int r;
  
-        assert(scope);
          assert(swtpm);
          assert(runtime_dir);
          assert(sd_socket_activate);
  
-        _cleanup_free_ char *scope_prefix = NULL;
-        r = unit_name_to_prefix(scope, &scope_prefix);
-        if (r < 0)
-                return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
-
          _cleanup_free_ char *listen_address = path_join(runtime_dir, "tpm.sock");
          if (!listen_address)
                  return log_oom();
  
+        /* Validate socket path length up front instead of a failure in swtpm */
+        union sockaddr_union sa;
+        r = sockaddr_un_set_path(&sa.un, listen_address);
+        if (r < 0)
+                return log_error_errno(r, "TPM socket path '%s' is too long: %m", listen_address);
+
          _cleanup_free_ char *transient_state_dir = NULL;
          const char *state_dir;
          if (arg_tpm_state_path)
                  state_dir = arg_tpm_state_path;
          else {
-                _cleanup_free_ char *dirname = strjoin(scope_prefix, "-tpm");
-                if (!dirname)
-                        return log_oom();
-
-                transient_state_dir = path_join(runtime_dir, dirname);
+                transient_state_dir = path_join(runtime_dir, "tpm");
                  if (!transient_state_dir)
                          return log_oom();
  
@@ -1694,7 +1689,6 @@ static int find_virtiofsd(char **ret) {
  }
  
  static int start_virtiofsd(
-                const char *scope,
                  const char *directory,
                  uid_t source_uid,
                  uid_t target_uid,
@@ -1705,7 +1699,6 @@ static int start_virtiofsd(
  
          int r;
  
-        assert(scope);
          assert(directory);
          assert(runtime_dir);
  
@@ -1714,11 +1707,8 @@ static int start_virtiofsd(
          if (r < 0)
                  return r;
  
-        _cleanup_free_ char *scope_prefix = NULL;
-        r = unit_name_to_prefix(scope, &scope_prefix);
-        if (r < 0)
-                return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
-
+        /* A VM may run several virtiofsd instances (one per shared dir), so the random suffix keeps their
+         * sockets distinct within the machine's runtime directory. */
          _cleanup_free_ char *listen_address = NULL;
          if (asprintf(&listen_address, "%s/sock-%"PRIx64, runtime_dir, random_u64()) < 0)
                  return log_oom();
@@ -2660,14 +2650,9 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
  
          /* Create our runtime directory. We need this for the QMP varlink control socket, the QEMU
           * config file, TPM state, virtiofsd sockets, runtime mounts, and SSH key material. */
-        _cleanup_free_ char *runtime_dir = NULL, *runtime_dir_suffix = NULL;
-        _cleanup_(rm_rf_physical_and_freep) char *runtime_dir_destroy = NULL;
+        _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
  
-        runtime_dir_suffix = path_join("systemd/vmspawn", arg_machine);
-        if (!runtime_dir_suffix)
-                return log_oom();
-
-        r = runtime_directory_make(arg_runtime_scope, runtime_dir_suffix, &runtime_dir, &runtime_dir_destroy);
+        r = runtime_directory_make(arg_runtime_scope, "systemd/vmspawn", arg_machine, &runtime_dir);
          if (r < 0)
                  return log_error_errno(r, "Failed to create runtime directory: %m");
  
@@ -3227,7 +3212,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                  }
  
                  r = start_virtiofsd(
-                                unit,
                                  arg_directory,
                                  /* source_uid= */ arg_uid_shift,
                                  /* target_uid= */ 0,
@@ -3303,7 +3287,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                          return log_oom();
  
                  r = start_virtiofsd(
-                                unit,
                                  m->source,
                                  /* source_uid= */ m->source_uid,
                                  /* target_uid= */ m->target_uid,
@@ -3425,7 +3408,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                  if (!GREEDY_REALLOC(children, n_children + 1))
                          return log_oom();
  
-                r = start_tpm(unit, swtpm, runtime_dir, sd_socket_activate, &tpm_socket_address, &child);
+                r = start_tpm(swtpm, runtime_dir, sd_socket_activate, &tpm_socket_address, &child);
                  if (r < 0) {
                          /* only bail if the user asked for a tpm */
                          if (arg_tpm > 0)
@@ -3507,14 +3490,10 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
          }
  
          if (arg_pass_ssh_key) {
-                _cleanup_free_ char *scope_prefix = NULL, *privkey_path = NULL, *pubkey_path = NULL;
+                _cleanup_free_ char *privkey_path = NULL, *pubkey_path = NULL;
                  const char *key_type = arg_ssh_key_type ?: "ed25519";
  
-                r = unit_name_to_prefix(unit, &scope_prefix);
-                if (r < 0)
-                        return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
-
-                privkey_path = strjoin(runtime_dir, "/", scope_prefix, "-", key_type);
+                privkey_path = path_join(runtime_dir, key_type);
                  if (!privkey_path)
                          return log_oom();
  
@@ -3531,7 +3510,7 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
          }
  
          if (ssh_public_key_path && ssh_private_key_path) {
-                _cleanup_free_ char *scope_prefix = NULL, *cred_path = NULL;
+                _cleanup_free_ char *cred_path = NULL;
  
                  cred_path = strjoin("ssh.ephemeral-authorized_keys-all:", ssh_public_key_path);
                  if (!cred_path)
@@ -3541,10 +3520,6 @@ static int run_virtual_machine(int kvm_device_fd, int vhost_device_fd) {
                  if (r < 0)
                          return log_error_errno(r, "Failed to load credential %s: %m", cred_path);
  
-                r = unit_name_to_prefix(unit, &scope_prefix);
-                if (r < 0)
-                        return log_error_errno(r, "Failed to strip .scope suffix from scope: %m");
-
                  /* on distros that provide their own sshd@.service file we need to provide a dropin which
                   * picks up our public key credential */
                  /* sshd reads AuthorizedKeysFile after dropping to the authenticating user's UID, so the
author	Kai Lüke <kai@amutable.com>
	Mon, 15 Jun 2026 13:11:41 +0000 (15:11 +0200)
committer	Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
	Thu, 2 Jul 2026 15:24:18 +0000 (17:24 +0200)
src/libsystemd/sd-path/path-lookup.c		patch \| blob \| blame \| history
src/libsystemd/sd-path/path-lookup.h		patch \| blob \| blame \| history
src/nspawn/nspawn.c		patch \| blob \| blame \| history
src/vmspawn/vmspawn.c		patch \| blob \| blame \| history