DynamicUser= enables PrivateTmp= implicitly to avoid files owned by reusable uids
leaking into the host. Change it to instead create a fully private tmpfs instance
instead, which also ensures the same result, since it has less impactful semantics
with respect to PrivateTmp=yes, which links the mount namespace to the host's /tmp
instead. If a user specifies PrivateTmp manually, let the existing behaviour
unchanged to ensure backward compatibility is not broken.
part of a unit for which dynamic users/groups are enabled do not leave files or directories owned by
these users/groups around, as a different unit might get the same UID/GID assigned later on, and thus
gain access to these files or directories. If <varname>DynamicUser=</varname> is enabled,
- <varname>RemoveIPC=</varname> and <varname>PrivateTmp=</varname> are implied (and cannot be turned
- off). This ensures that the lifetime of IPC objects and temporary files created by the executed
- processes is bound to the runtime of the service, and hence the lifetime of the dynamic
- user/group. Since <filename>/tmp/</filename> and <filename>/var/tmp/</filename> are usually the only
- world-writable directories on a system this ensures that a unit making use of dynamic user/group
- allocation cannot leave files around after unit termination. Furthermore
+ <varname>RemoveIPC=</varname> is implied (and cannot be turned off). This ensures that the lifetime
+ of IPC objects and temporary files created by the executed processes is bound to the runtime of the
+ service, and hence the lifetime of the dynamic user/group. Since <filename>/tmp/</filename> and
+ <filename>/var/tmp/</filename> are usually the only world-writable directories on a system, unless
+ <varname>PrivateTmp=</varname> is manually enabled, those directories will be placed on a private
+ tmpfs filesystem, as this ensures that a unit making use of dynamic user/group allocation cannot
+ leave files around after unit termination. Furthermore
<varname>NoNewPrivileges=</varname> and <varname>RestrictSUIDSGID=</varname> are implicitly enabled
(and cannot be disabled), to ensure that processes invoked cannot take benefit or create SUID/SGID
files or directories. Moreover <varname>ProtectSystem=strict</varname> and
SD_BUS_PROPERTY("NoExecPaths", "as", NULL, offsetof(ExecContext, no_exec_paths), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ExecSearchPath", "as", NULL, offsetof(ExecContext, exec_search_path), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_propagation_flag), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_private_tmp, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectClock", "b", bus_property_get_bool, offsetof(ExecContext, protect_clock), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST),
return bus_set_transient_unsigned(u, name, &c->tty_cols, message, flags, error);
if (streq(name, "PrivateTmp"))
- return bus_set_transient_bool(u, name, &c->private_tmp, message, flags, error);
+ return bus_set_transient_private_tmp(u, name, &c->private_tmp, message, flags, error);
if (streq(name, "PrivateDevices"))
return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
return 1;
}
+int bus_set_transient_private_tmp(
+ Unit *u,
+ const char *name,
+ PrivateTmp *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int v, r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "b", &v);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = v ? PRIVATE_TMP_CONNECTED : PRIVATE_TMP_OFF;
+ unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(v));
+ }
+
+ return 1;
+}
+
+int bus_property_get_private_tmp(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ PrivateTmp *p = ASSERT_PTR(userdata);
+ int b = *p != PRIVATE_TMP_OFF;
+
+ return sd_bus_message_append_basic(reply, 'b', &b);
+}
+
int bus_verify_manage_units_async_full(
Unit *u,
const char *verb,
#include "sd-bus.h"
#include "dissect-image.h"
+#include "execute.h"
#include "unit.h"
int bus_property_get_triggered_unit(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_tristate(Unit *u, const char *name, int *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_usec_internal(Unit *u, const char *name, usec_t *p, bool fix_0, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_private_tmp(Unit *u, const char *name, PrivateTmp *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
return bus_set_transient_usec_internal(u, name, p, false, message, flags, error);
}
int bus_read_mount_options(sd_bus_message *message, sd_bus_error *error, MountOptions **ret_options, char **ret_format_str, const char *separator);
int bus_property_get_activation_details(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
+int bus_property_get_private_tmp(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
_cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
**read_write_paths_cleanup = NULL;
_cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL,
- *extension_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
+ *private_namespace_dir = NULL, *host_os_release_stage = NULL, *root_image = NULL, *root_dir = NULL;
const char *tmp_dir = NULL, *var_tmp_dir = NULL;
char **read_write_paths;
bool setup_os_release_symlink;
* to world users. Inside of it there's a /tmp that is sticky, and that's the one we want to
* use here. This does not apply when we are using /run/systemd/empty as fallback. */
- if (context->private_tmp && runtime && runtime->shared) {
+ if (context->private_tmp == PRIVATE_TMP_CONNECTED && runtime && runtime->shared) {
if (streq_ptr(runtime->shared->tmp_dir, RUN_SYSTEMD_EMPTY))
tmp_dir = runtime->shared->tmp_dir;
else if (runtime->shared->tmp_dir)
if (!incoming_dir)
return -ENOMEM;
- extension_dir = strdup("/run/systemd/unit-extensions");
- if (!extension_dir)
+ private_namespace_dir = strdup("/run/systemd");
+ if (!private_namespace_dir)
return -ENOMEM;
/* If running under a different root filesystem, propagate the host's os-release. We make a
} else {
assert(params->runtime_scope == RUNTIME_SCOPE_USER);
- if (asprintf(&extension_dir, "/run/user/" UID_FMT "/systemd/unit-extensions", geteuid()) < 0)
+ if (asprintf(&private_namespace_dir, "/run/user/" UID_FMT "/systemd", geteuid()) < 0)
return -ENOMEM;
if (setup_os_release_symlink) {
.temporary_filesystems = context->temporary_filesystems,
.n_temporary_filesystems = context->n_temporary_filesystems,
+ .private_tmp = context->private_tmp,
+
.mount_images = context->mount_images,
.n_mount_images = context->n_mount_images,
.mount_image_policy = context->mount_image_policy ?: &image_policy_service,
.propagate_dir = propagate_dir,
.incoming_dir = incoming_dir,
- .extension_dir = extension_dir,
+ .private_namespace_dir = private_namespace_dir,
.notify_socket = root_dir || root_image ? params->notify_socket : NULL,
.host_os_release_stage = host_os_release_stage,
return false;
return context->private_users ||
- context->private_tmp ||
+ context->private_tmp != PRIVATE_TMP_OFF ||
context->private_devices ||
context->private_network ||
context->network_namespace_path ||
if (r < 0)
return r;
- r = serialize_bool_elide(f, "exec-context-private-tmp", c->private_tmp);
+ r = serialize_item(f, "exec-context-private-tmp", private_tmp_to_string(c->private_tmp));
if (r < 0)
return r;
if (r < 0)
return r;
} else if ((val = startswith(l, "exec-context-private-tmp="))) {
- r = parse_boolean(val);
- if (r < 0)
- return r;
- c->private_tmp = r;
+ c->private_tmp = private_tmp_from_string(val);
+ if (c->private_tmp < 0)
+ return c->private_tmp;
} else if ((val = startswith(l, "exec-context-private-devices="))) {
r = parse_boolean(val);
if (r < 0)
if (!IN_SET(context->mount_propagation_flag, 0, MS_SHARED))
return true;
- if (context->private_tmp && runtime && runtime->shared && (runtime->shared->tmp_dir || runtime->shared->var_tmp_dir))
+ if (context->private_tmp == PRIVATE_TMP_DISCONNECTED)
+ return true;
+
+ if (context->private_tmp == PRIVATE_TMP_CONNECTED && runtime && runtime->shared && (runtime->shared->tmp_dir || runtime->shared->var_tmp_dir))
return true;
if (context->private_devices ||
prefix, empty_to_root(c->root_directory),
prefix, yes_no(c->root_ephemeral),
prefix, yes_no(c->non_blocking),
- prefix, yes_no(c->private_tmp),
+ prefix, private_tmp_to_string(c->private_tmp),
prefix, yes_no(c->private_devices),
prefix, yes_no(c->protect_kernel_tunables),
prefix, yes_no(c->protect_kernel_modules),
assert(id);
/* It is not necessary to create ExecSharedRuntime object. */
- if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && !c->private_tmp) {
+ if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && c->private_tmp != PRIVATE_TMP_CONNECTED) {
*ret = NULL;
return 0;
}
- if (c->private_tmp &&
+ if (c->private_tmp == PRIVATE_TMP_CONNECTED &&
!(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
(prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
int private_mounts;
int mount_apivfs;
int memory_ksm;
- bool private_tmp;
+ PrivateTmp private_tmp;
bool private_network;
bool private_devices;
bool private_users;
{{type}}.BindPaths, config_parse_bind_paths, 0, offsetof({{type}}, exec_context)
{{type}}.BindReadOnlyPaths, config_parse_bind_paths, 0, offsetof({{type}}, exec_context)
{{type}}.TemporaryFileSystem, config_parse_temporary_filesystems, 0, offsetof({{type}}, exec_context)
-{{type}}.PrivateTmp, config_parse_bool, 0, offsetof({{type}}, exec_context.private_tmp)
+{{type}}.PrivateTmp, config_parse_private_tmp, 0, offsetof({{type}}, exec_context)
{{type}}.PrivateDevices, config_parse_bool, 0, offsetof({{type}}, exec_context.private_devices)
{{type}}.ProtectKernelTunables, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_kernel_tunables)
{{type}}.ProtectKernelModules, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_kernel_modules)
}
}
+int config_parse_private_tmp(
+ const char* unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = ASSERT_PTR(data);
+ int r;
+
+ assert(filename);
+ assert(rvalue);
+
+ r = parse_boolean(rvalue);
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse boolean value: %s ignoring", rvalue);
+ return 0;
+ }
+
+ c->private_tmp = r ? PRIVATE_TMP_CONNECTED : PRIVATE_TMP_OFF;
+ return 0;
+}
+
int config_parse_bind_paths(
const char *unit,
const char *filename,
CONFIG_PARSER_PROTOTYPE(config_parse_set_status);
CONFIG_PARSER_PROTOTYPE(config_parse_namespace_path_strv);
CONFIG_PARSER_PROTOTYPE(config_parse_temporary_filesystems);
+CONFIG_PARSER_PROTOTYPE(config_parse_private_tmp);
CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota);
CONFIG_PARSER_PROTOTYPE(config_parse_allowed_cpuset);
CONFIG_PARSER_PROTOTYPE(config_parse_protect_home);
MOUNT_EXEC,
MOUNT_TMPFS,
MOUNT_RUN,
+ MOUNT_PRIVATE_TMPFS, /* Mounted outside the root directory, and used by subsequent mounts */
MOUNT_EXTENSION_DIRECTORY, /* Bind-mounted outside the root directory, and used by subsequent mounts */
MOUNT_EXTENSION_IMAGE, /* Mounted outside the root directory, and used by subsequent mounts */
MOUNT_MQUEUEFS,
bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */
bool noexec:1; /* Shall set MS_NOEXEC on the mount itself */
bool exec:1; /* Shall clear MS_NOEXEC on the mount itself */
+ bool create_source_dir:1; /* Create the source directory if it doesn't exist - for implicit bind mounts */
+ mode_t source_dir_mode; /* Mode for the source directory, if it is to be created */
MountEntryState state; /* Whether it was already processed or skipped */
char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */
const char *unprefixed_path_const; /* If the path was amended with a prefix, these will save the original */
[MOUNT_EXEC] = "exec",
[MOUNT_TMPFS] = "tmpfs",
[MOUNT_RUN] = "run",
+ [MOUNT_PRIVATE_TMPFS] = "private-tmpfs",
[MOUNT_EXTENSION_DIRECTORY] = "extension-directory",
[MOUNT_EXTENSION_IMAGE] = "extension-image",
[MOUNT_MQUEUEFS] = "mqueuefs",
static int append_extensions(
MountList *ml,
const char *root,
- const char *extension_dir,
+ const char *private_namespace_dir,
char **hierarchies,
const MountImage *mount_images,
size_t n_mount_images,
if (n_mount_images == 0 && strv_isempty(extension_directories))
return 0;
- assert(extension_dir);
+ assert(private_namespace_dir);
n_overlays = strv_length(hierarchies);
if (n_overlays == 0)
"No matching entry in .v/ directory %s found.",
m->source);
- if (asprintf(&mount_point, "%s/%zu", extension_dir, i) < 0)
+ if (asprintf(&mount_point, "%s/unit-extensions/%zu", private_namespace_dir, i) < 0)
return -ENOMEM;
for (size_t j = 0; hierarchies && hierarchies[j]; ++j) {
bool ignore_enoent = false;
/* Pick up the counter where the ExtensionImages left it. */
- if (asprintf(&mount_point, "%s/%zu", extension_dir, n_mount_images++) < 0)
+ if (asprintf(&mount_point, "%s/unit-extensions/%zu", private_namespace_dir, n_mount_images++) < 0)
return -ENOMEM;
/* Look for any prefixes */
for (f = ml->mounts, t = ml->mounts; f < ml->mounts + ml->n_mounts; f++) {
- /* ExtensionImages/Directories bases are opened in /run/systemd/unit-extensions on the host */
- if (!IN_SET(f->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY) && !path_startswith(mount_entry_path(f), root_directory)) {
+ /* ExtensionImages/Directories bases are opened in /run/[user/xyz/]systemd/unit-extensions
+ * on the host, and a private (invisible to the guest) tmpfs instance is mounted on
+ * /run/[user/xyz/]systemd/unit-private-tmp as the storage backend of private /tmp and
+ * /var/tmp. */
+ if (!IN_SET(f->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY, MOUNT_PRIVATE_TMPFS) &&
+ !path_startswith(mount_entry_path(f), root_directory)) {
log_debug("%s is outside of root directory.", mount_entry_path(f));
mount_entry_done(f);
continue;
* that bind mount source paths are always relative to the host root, hence we pass NULL as
* root directory to chase() here. */
+ /* When we create implicit mounts, we might need to create the path ourselves as it is on a
+ * just-created tmpfs, for example. */
+ if (m->create_source_dir) {
+ r = mkdir_p(mount_entry_source(m), m->source_dir_mode);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create source directory %s: %m", mount_entry_source(m));
+ }
+
r = chase(mount_entry_source(m), NULL, CHASE_TRAIL_SLASH, &chased, NULL);
if (r == -ENOENT && m->ignore) {
log_debug_errno(r, "Path %s does not exist, ignoring.", mount_entry_source(m));
}
case MOUNT_EMPTY_DIR:
+ case MOUNT_PRIVATE_TMPFS:
case MOUNT_TMPFS:
return mount_tmpfs(m);
* and running Linux <= 4.17. */
submounts =
mount_entry_read_only(m) &&
- !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS);
+ !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS, MOUNT_PRIVATE_TMPFS);
if (submounts)
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, deny_list, proc_self_mountinfo);
else
if (flags_mask == 0) /* No Change? */
return 0;
- submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS);
+ submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS, MOUNT_PRIVATE_TMPFS);
if (submounts)
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), new_flags, flags_mask, deny_list, proc_self_mountinfo);
if (m->state != MOUNT_APPLIED)
return 0;
- submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS);
+ submounts = !IN_SET(m->mode, MOUNT_EMPTY_DIR, MOUNT_TMPFS, MOUNT_PRIVATE_TMPFS);
if (submounts)
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), MS_NOSUID, MS_NOSUID, NULL, proc_self_mountinfo);
else
if (m->state != MOUNT_PENDING)
continue;
- /* ExtensionImages/Directories are first opened in the propagate directory, not in the root_directory */
- r = follow_symlink(!IN_SET(m->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY) ? root : NULL, m);
+ /* ExtensionImages/Directories are first opened in the propagate directory, not in
+ * the root_directory. A private (invisible to the guest) tmpfs instance is mounted
+ * on /run/[user/xyz/]systemd/unit-private-tmp as the storage backend of private
+ * /tmp and /var/tmp. */
+ r = follow_symlink(!IN_SET(m->mode, MOUNT_EXTENSION_IMAGE, MOUNT_EXTENSION_DIRECTORY, MOUNT_PRIVATE_TMPFS) ? root : NULL, m);
if (r < 0) {
mount_entry_path_debug_string(root, m, error_path);
return r;
if (r < 0)
return r;
- if (p->tmp_dir) {
- bool ro = streq(p->tmp_dir, RUN_SYSTEMD_EMPTY);
+ /* When DynamicUser=yes enforce that /tmp/ and /var/tmp/ are disconnected from the host's
+ * directories, as they are world writable and ephemeral uid/gid will be used. */
+ if (p->private_tmp == PRIVATE_TMP_DISCONNECTED) {
+ _cleanup_free_ char *tmpfs_dir = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
+ MountEntry *tmpfs_entry, *tmp_entry, *var_tmp_entry;
- MountEntry *me = mount_list_extend(&ml);
- if (!me)
+ tmpfs_dir = path_join(p->private_namespace_dir, "unit-private-tmp");
+ tmp_dir = path_join(tmpfs_dir, "tmp");
+ var_tmp_dir = path_join(tmpfs_dir, "var-tmp");
+ if (!tmpfs_dir || !tmp_dir || !var_tmp_dir)
return log_oom_debug();
- *me = (MountEntry) {
- .path_const = "/tmp",
- .mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
- .source_const = p->tmp_dir,
+ tmpfs_entry = mount_list_extend(&ml);
+ if (!tmpfs_entry)
+ return log_oom_debug();
+ *tmpfs_entry = (MountEntry) {
+ .path_malloc = TAKE_PTR(tmpfs_dir),
+ .mode = MOUNT_PRIVATE_TMPFS,
+ .options_const = "mode=0700" NESTED_TMPFS_LIMITS,
+ .flags = MS_NODEV|MS_STRICTATIME,
+ .has_prefix = true,
};
- }
- if (p->var_tmp_dir) {
- bool ro = streq(p->var_tmp_dir, RUN_SYSTEMD_EMPTY);
-
- MountEntry *me = mount_list_extend(&ml);
- if (!me)
+ tmp_entry = mount_list_extend(&ml);
+ if (!tmp_entry)
return log_oom_debug();
+ *tmp_entry = (MountEntry) {
+ .source_malloc = TAKE_PTR(tmp_dir),
+ .path_const = "/tmp",
+ .mode = MOUNT_BIND,
+ .source_dir_mode = 01777,
+ .create_source_dir = true,
+ };
- *me = (MountEntry) {
+ var_tmp_entry = mount_list_extend(&ml);
+ if (!var_tmp_entry)
+ return log_oom_debug();
+ *var_tmp_entry = (MountEntry) {
+ .source_malloc = TAKE_PTR(var_tmp_dir),
.path_const = "/var/tmp",
- .mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
- .source_const = p->var_tmp_dir,
+ .mode = MOUNT_BIND,
+ .source_dir_mode = 01777,
+ .create_source_dir = true,
};
+
+ /* Ensure that the tmpfs is mounted first, and bind mounts are added later. */
+ assert_cc(MOUNT_BIND < MOUNT_PRIVATE_TMPFS);
+ } else {
+ if (p->tmp_dir) {
+ bool ro = streq(p->tmp_dir, RUN_SYSTEMD_EMPTY);
+
+ MountEntry *me = mount_list_extend(&ml);
+ if (!me)
+ return log_oom_debug();
+
+ *me = (MountEntry) {
+ .path_const = "/tmp",
+ .mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
+ .source_const = p->tmp_dir,
+ };
+ }
+
+ if (p->var_tmp_dir) {
+ bool ro = streq(p->var_tmp_dir, RUN_SYSTEMD_EMPTY);
+
+ MountEntry *me = mount_list_extend(&ml);
+ if (!me)
+ return log_oom_debug();
+
+ *me = (MountEntry) {
+ .path_const = "/var/tmp",
+ .mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
+ .source_const = p->var_tmp_dir,
+ };
+ }
}
r = append_mount_images(&ml, p->mount_images, p->n_mount_images);
if (r < 0)
return r;
- r = append_extensions(&ml, root, p->extension_dir, hierarchies, p->extension_images, p->n_extension_images, p->extension_directories);
+ r = append_extensions(&ml, root, p->private_namespace_dir, hierarchies, p->extension_images, p->n_extension_images, p->extension_directories);
if (r < 0)
return r;
if (setup_propagate)
(void) mkdir_p(p->propagate_dir, 0600);
- if (p->n_extension_images > 0 || !strv_isempty(p->extension_directories))
+ if (p->n_extension_images > 0 || !strv_isempty(p->extension_directories)) {
/* ExtensionImages/Directories mountpoint directories will be created while parsing the
* mounts to create, so have the parent ready */
- (void) mkdir_p(p->extension_dir, 0600);
+ char *extension_dir = strjoina(p->private_namespace_dir, "/unit-extensions");
+ (void) mkdir_p(extension_dir, 0600);
+ }
/* Remount / as SLAVE so that nothing now mounted in the namespace
* shows up in the parent */
};
DEFINE_STRING_TABLE_LOOKUP(proc_subset, ProcSubset);
+
+static const char* const private_tmp_table[_PRIVATE_TMP_MAX] = {
+ [PRIVATE_TMP_OFF] = "off",
+ [PRIVATE_TMP_CONNECTED] = "connected",
+ [PRIVATE_TMP_DISCONNECTED] = "disconnected",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(private_tmp, PrivateTmp);
_PROC_SUBSET_INVALID = -EINVAL,
} ProcSubset;
+typedef enum PrivateTmp {
+ PRIVATE_TMP_OFF,
+ PRIVATE_TMP_CONNECTED, /* Bind mounted from the host's filesystem */
+ PRIVATE_TMP_DISCONNECTED, /* A completely private tmpfs, invisible from the host */
+ _PRIVATE_TMP_MAX,
+ _PRIVATE_TMP_INVALID = -EINVAL,
+} PrivateTmp;
+
struct BindMount {
char *source;
char *destination;
const char *propagate_dir;
const char *incoming_dir;
- const char *extension_dir;
+ const char *private_namespace_dir;
const char *notify_socket;
const char *host_os_release_stage;
ProtectSystem protect_system;
ProtectProc protect_proc;
ProcSubset proc_subset;
+ PrivateTmp private_tmp;
};
int setup_namespace(const NamespaceParameters *p, char **error_path);
const char* proc_subset_to_string(ProcSubset i) _const_;
ProcSubset proc_subset_from_string(const char *s) _pure_;
+const char* private_tmp_to_string(PrivateTmp i) _const_;
+PrivateTmp private_tmp_from_string(const char *s) _pure_;
+
void bind_mount_free_many(BindMount *b, size_t n);
int bind_mount_add(BindMount **b, size_t *n, const BindMount *item);
return r;
}
- if (c->private_tmp) {
+ if (c->private_tmp == PRIVATE_TMP_CONNECTED) {
r = unit_add_mounts_for(u, "/tmp", UNIT_DEPENDENCY_FILE, UNIT_MOUNT_WANTS);
if (r < 0)
return r;
* UID/GID around in the file system or on IPC objects. Hence enforce a strict
* sandbox. */
- ec->private_tmp = true;
+ /* With DynamicUser= we want private directories, so if the user hasn't manually
+ * selected PrivateTmp=, enable it, but to a fully private (disconnected) tmpfs
+ * instance. */
+ if (ec->private_tmp == PRIVATE_TMP_OFF)
+ ec->private_tmp = PRIVATE_TMP_DISCONNECTED;
ec->remove_ipc = true;
ec->protect_system = PROTECT_SYSTEM_STRICT;
if (ec->protect_home == PROTECT_HOME_NO)
(! systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls /sys)
fi
+if [[ ! -v ASAN_OPTIONS ]]; then
+ # Ensure DynamicUser=yes does not imply PrivateTmp=yes if TemporaryFileSystem=/tmp /var/tmp is set
+ systemd-run --unit test-07-dynamic-user-tmp.service \
+ --service-type=notify \
+ -p DynamicUser=yes \
+ -p NotifyAccess=all \
+ sh -c 'touch /tmp/a && touch /var/tmp/b && ! test -f /tmp/b && ! test -f /var/tmp/a && systemd-notify --ready && sleep infinity'
+ (! ls /tmp/systemd-private-"$(tr -d '-' < /proc/sys/kernel/random/boot_id)"-test-07-dynamic-user-tmp.service-* &>/dev/null)
+ (! ls /var/tmp/systemd-private-"$(tr -d '-' < /proc/sys/kernel/random/boot_id)"-test-07-dynamic-user-tmp.service-* &>/dev/null)
+ systemctl is-active test-07-dynamic-user-tmp.service
+ systemctl stop test-07-dynamic-user-tmp.service
+fi
+
# Make sure we properly (de)serialize various string arrays, including whitespaces
# See: https://github.com/systemd/systemd/issues/31214
systemd-run --wait --pipe -p Environment="FOO='bar4 '" \