@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s RootVerity = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly b RootEphemeral = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as ExtensionDirectories = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(sba(ss)) ExtensionImages = [...];
<!--property RootHashSignaturePath is not documented!-->
+ <!--property RootEphemeral is not documented!-->
+
<!--property OOMScoreAdjust is not documented!-->
<!--property CoredumpFilter is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="RootVerity"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="RootEphemeral"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="ExtensionDirectories"/>
<variablelist class="dbus-property" generated="True" extra-ref="ExtensionImages"/>
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s RootVerity = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly b RootEphemeral = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as ExtensionDirectories = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(sba(ss)) ExtensionImages = [...];
<!--property RootHashSignaturePath is not documented!-->
+ <!--property RootEphemeral is not documented!-->
+
<!--property OOMScoreAdjust is not documented!-->
<!--property CoredumpFilter is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="RootVerity"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="RootEphemeral"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="ExtensionDirectories"/>
<variablelist class="dbus-property" generated="True" extra-ref="ExtensionImages"/>
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s RootVerity = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly b RootEphemeral = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as ExtensionDirectories = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(sba(ss)) ExtensionImages = [...];
<!--property RootHashSignaturePath is not documented!-->
+ <!--property RootEphemeral is not documented!-->
+
<!--property OOMScoreAdjust is not documented!-->
<!--property CoredumpFilter is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="RootVerity"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="RootEphemeral"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="ExtensionDirectories"/>
<variablelist class="dbus-property" generated="True" extra-ref="ExtensionImages"/>
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly s RootVerity = '...';
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+ readonly b RootEphemeral = ...;
+ @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly as ExtensionDirectories = ['...', ...];
@org.freedesktop.DBus.Property.EmitsChangedSignal("const")
readonly a(sba(ss)) ExtensionImages = [...];
<!--property RootHashSignaturePath is not documented!-->
+ <!--property RootEphemeral is not documented!-->
+
<!--property OOMScoreAdjust is not documented!-->
<!--property CoredumpFilter is not documented!-->
<variablelist class="dbus-property" generated="True" extra-ref="RootVerity"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="RootEphemeral"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="ExtensionDirectories"/>
<variablelist class="dbus-property" generated="True" extra-ref="ExtensionImages"/>
#include <unistd.h>
#include <utmpx.h>
+#include <linux/fs.h> /* Must be included after <sys/mount.h> */
+
#if HAVE_PAM
#include <security/pam_appl.h>
#endif
#include "async.h"
#include "barrier.h"
#include "bpf-lsm.h"
+#include "btrfs-util.h"
#include "cap-list.h"
#include "capability-util.h"
+#include "chattr-util.h"
#include "cgroup-setup.h"
#include "chase.h"
#include "chown-recursive.h"
#include "io-util.h"
#include "ioprio-util.h"
#include "label-util.h"
+#include "lock-util.h"
#include "log.h"
#include "macro.h"
#include "manager.h"
return context->private_network || context->network_namespace_path;
}
+static bool exec_needs_ephemeral(const ExecContext *context) {
+ return (context->root_image || context->root_directory) && context->root_ephemeral;
+}
+
static bool exec_needs_ipc_namespace(const ExecContext *context) {
assert(context);
return false;
}
+static int setup_ephemeral(const ExecContext *context, ExecRuntime *runtime) {
+ _cleanup_close_ int fd = -EBADF;
+ int r;
+
+ if (!runtime || !runtime->ephemeral_copy)
+ return 0;
+
+ r = posix_lock(runtime->ephemeral_storage_socket[0], LOCK_EX);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to lock ephemeral storage socket: %m");
+
+ CLEANUP_POSIX_UNLOCK(runtime->ephemeral_storage_socket[0]);
+
+ fd = receive_one_fd(runtime->ephemeral_storage_socket[0], MSG_PEEK|MSG_DONTWAIT);
+ if (fd >= 0)
+ /* We got an fd! That means ephemeral has already been set up, so nothing to do here. */
+ return 0;
+
+ if (fd != -EAGAIN)
+ return log_debug_errno(fd, "Failed to receive file descriptor queued on ephemeral storage socket: %m");
+
+ log_debug("Making ephemeral snapshot of %s to %s",
+ context->root_image ?: context->root_directory, runtime->ephemeral_copy);
+
+ if (context->root_image)
+ fd = copy_file(context->root_image, runtime->ephemeral_copy, O_EXCL, 0600,
+ COPY_LOCK_BSD|COPY_REFLINK|COPY_CRTIME);
+ else
+ fd = btrfs_subvol_snapshot_at(AT_FDCWD, context->root_directory,
+ AT_FDCWD, runtime->ephemeral_copy,
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_LOCK_BSD);
+ if (fd < 0)
+ return log_debug_errno(fd, "Failed to snapshot %s to %s: %m",
+ context->root_image ?: context->root_directory, runtime->ephemeral_copy);
+
+ if (context->root_image) {
+ /* A root image might be subject to lots of random writes so let's try to disable COW on it
+ * which tends to not perform well in combination with lots of random writes.
+ *
+ * Note: btrfs actually isn't impressed by us setting the flag after making the reflink'ed
+ * copy, but we at least want to make the intention clear.
+ */
+ r = chattr_fd(fd, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
+ if (r < 0)
+ log_debug_errno(fd, "Failed to disable copy-on-write for %s, ignoring: %m", runtime->ephemeral_copy);
+ }
+
+ r = send_one_fd(runtime->ephemeral_storage_socket[1], fd, MSG_DONTWAIT);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to queue file descriptor on ephemeral storage socket: %m");
+
+ return 1;
+}
+
static int verity_settings_prepare(
VeritySettings *verity,
const char *root_image,
ExecCommandFlags command_flags,
const ExecContext *context,
const ExecParameters *params,
- const ExecRuntime *runtime,
+ ExecRuntime *runtime,
const char *memory_pressure_path,
char **error_path) {
CLEANUP_ARRAY(bind_mounts, n_bind_mounts, bind_mount_free_many);
if (params->flags & EXEC_APPLY_CHROOT) {
- root_image = context->root_image;
+ r = setup_ephemeral(context, runtime);
+ if (r < 0)
+ return r;
- if (!root_image)
- root_dir = context->root_directory;
+ if (context->root_image)
+ root_image = (runtime ? runtime->ephemeral_copy : NULL) ?: context->root_image;
+ else
+ root_dir = (runtime ? runtime->ephemeral_copy : NULL) ?: context->root_directory;
}
r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
static int apply_working_directory(
const ExecContext *context,
const ExecParameters *params,
+ ExecRuntime *runtime,
const char *home,
int *exit_status) {
if (params->flags & EXEC_APPLY_CHROOT)
d = wd;
else
- d = prefix_roota(context->root_directory, wd);
+ d = prefix_roota((runtime ? runtime->ephemeral_copy : NULL) ?: context->root_directory, wd);
if (chdir(d) < 0 && !context->working_directory_missing_ok) {
*exit_status = EXIT_CHDIR;
static int apply_root_directory(
const ExecContext *context,
const ExecParameters *params,
+ ExecRuntime *runtime,
const bool needs_mount_ns,
int *exit_status) {
if (params->flags & EXEC_APPLY_CHROOT)
if (!needs_mount_ns && context->root_directory)
- if (chroot(context->root_directory) < 0) {
+ if (chroot((runtime ? runtime->ephemeral_copy : NULL) ?: context->root_directory) < 0) {
*exit_status = EXIT_CHROOT;
return -errno;
}
const int *fds, size_t n_fds) {
size_t n_dont_close = 0;
- int dont_close[n_fds + 12];
+ int dont_close[n_fds + 14];
assert(params);
n_dont_close += n_fds;
}
+ if (runtime)
+ append_socket_pair(dont_close, &n_dont_close, runtime->ephemeral_storage_socket);
+
if (runtime && runtime->shared) {
append_socket_pair(dont_close, &n_dont_close, runtime->shared->netns_storage_socket);
append_socket_pair(dont_close, &n_dont_close, runtime->shared->ipcns_storage_socket);
}
/* chroot to root directory first, before we lose the ability to chroot */
- r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
+ r = apply_root_directory(context, params, runtime, needs_mount_namespace, exit_status);
if (r < 0)
return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
/* Apply working directory here, because the working directory might be on NFS and only the user running
* this service might have the correct privilege to change to the working directory */
- r = apply_working_directory(context, params, home, exit_status);
+ r = apply_working_directory(context, params, runtime, home, exit_status);
if (r < 0)
return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
"%sUMask: %04o\n"
"%sWorkingDirectory: %s\n"
"%sRootDirectory: %s\n"
+ "%sRootEphemeral: %s\n"
"%sNonBlocking: %s\n"
"%sPrivateTmp: %s\n"
"%sPrivateDevices: %s\n"
prefix, c->umask,
prefix, empty_to_root(c->working_directory),
prefix, empty_to_root(c->root_directory),
+ prefix, yes_no(c->root_ephemeral),
prefix, yes_no(c->non_blocking),
prefix, yes_no(c->private_tmp),
prefix, yes_no(c->private_devices),
return 0;
}
-static void *remove_tmpdir_thread(void *p) {
+static void *rm_rf_thread(void *p) {
_cleanup_free_ char *path = p;
- (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
+ (void) rm_rf(path, REMOVE_ROOT|REMOVE_SUBVOLUME|REMOVE_PHYSICAL);
return NULL;
}
+static void asynchronous_rm_rf(char **path) {
+ int r;
+
+ assert(path);
+
+ if (!*path || streq(*path, RUN_SYSTEMD_EMPTY))
+ return;
+
+ log_debug("Spawning thread to nuke %s", *path);
+
+ r = asynchronous_job(rm_rf_thread, *path);
+ if (r < 0)
+ log_warning_errno(r, "Failed to nuke %s: %m", *path);
+ else
+ *path = NULL;
+}
+
static ExecSharedRuntime* exec_shared_runtime_free(ExecSharedRuntime *rt) {
if (!rt)
return NULL;
DEFINE_TRIVIAL_CLEANUP_FUNC(ExecSharedRuntime*, exec_shared_runtime_free);
ExecSharedRuntime* exec_shared_runtime_destroy(ExecSharedRuntime *rt) {
- int r;
-
if (!rt)
return NULL;
if (rt->n_ref > 0)
return NULL;
- if (rt->tmp_dir && !streq(rt->tmp_dir, RUN_SYSTEMD_EMPTY)) {
- log_debug("Spawning thread to nuke %s", rt->tmp_dir);
-
- r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
- if (r < 0)
- log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
- else
- rt->tmp_dir = NULL;
- }
-
- if (rt->var_tmp_dir && !streq(rt->var_tmp_dir, RUN_SYSTEMD_EMPTY)) {
- log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
-
- r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
- if (r < 0)
- log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
- else
- rt->var_tmp_dir = NULL;
- }
+ asynchronous_rm_rf(&rt->tmp_dir);
+ asynchronous_rm_rf(&rt->var_tmp_dir);
return exec_shared_runtime_free(rt);
}
}
}
-int exec_runtime_make(ExecSharedRuntime *shared, DynamicCreds *creds, ExecRuntime **ret) {
+int exec_runtime_make(
+ const Unit *unit,
+ const ExecContext *context,
+ ExecSharedRuntime *shared,
+ DynamicCreds *creds,
+ ExecRuntime **ret) {
+ _cleanup_close_pair_ int ephemeral_storage_socket[2] = PIPE_EBADF;
+ _cleanup_free_ char *ephemeral = NULL;
_cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
+ int r;
+ assert(unit);
+ assert(context);
assert(ret);
- if (!shared && !creds) {
+ if (!shared && !creds && !exec_needs_ephemeral(context)) {
*ret = NULL;
return 0;
}
+ if (exec_needs_ephemeral(context)) {
+ r = mkdir_p("/var/lib/systemd/ephemeral-trees", 0755);
+ if (r < 0)
+ return r;
+
+ r = tempfn_random_child("/var/lib/systemd/ephemeral-trees", unit->id, &ephemeral);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ephemeral_storage_socket) < 0)
+ return -errno;
+ }
+
rt = new(ExecRuntime, 1);
if (!rt)
return -ENOMEM;
*rt = (ExecRuntime) {
.shared = shared,
.dynamic_creds = creds,
+ .ephemeral_copy = TAKE_PTR(ephemeral),
+ .ephemeral_storage_socket[0] = TAKE_FD(ephemeral_storage_socket[0]),
+ .ephemeral_storage_socket[1] = TAKE_FD(ephemeral_storage_socket[1]),
};
*ret = TAKE_PTR(rt);
exec_shared_runtime_unref(rt->shared);
dynamic_creds_unref(rt->dynamic_creds);
+
+ asynchronous_rm_rf(&rt->ephemeral_copy);
+
+ free(rt->ephemeral_copy);
+ safe_close_pair(rt->ephemeral_storage_socket);
return mfree(rt);
}