This new setting is supposed to be useful in most cases where
"MountFlags=slave" is currently used, i.e. as an explicit way to run a
service in its own mount namespace and decouple propagation from all
mounts of the new mount namespace towards the host.
The effect of MountFlags=slave and PrivateMounts=yes is mostly the same,
as both cause a CLONE_NEWNS namespace to be opened, and both will result
in all mounts within it to be mounted MS_SLAVE. The difference is mostly
on the conceptual/philosophical level: configuring the propagation mode
is nothing people should have to think about, in particular as the
matter is not precisely easyto grok. Moreover, MountFlags= allows configuration
of "private" and "slave" modes which don't really make much sense to use
in real-life and are quite confusing. In particular PrivateMounts=private means
mounts made on the host stay pinned for good by the service which is
particularly nasty for removable media mount. And PrivateMounts=shared
is in most ways a NOP when used a alone...
The main technical difference between setting only MountFlags=slave or
only PrivateMounts=yes in a unit file is that the former remounts all
mounts to MS_SLAVE and leaves them there, while that latter remounts
them to MS_SHARED again right after. The latter is generally a nicer
approach, since it disables propagation, while MS_SHARED is afterwards
in effect, which is really nice as that means further namespacing down
the tree will get MS_SHARED logic by default and we unify how
applications see our mounts as we always pass them as MS_SHARED
regardless whether any mount namespacing is used or not.
The effect of PrivateMounts=yes was implied already by all the other
mount namespacing options. With this new option we add an explicit knob
for it, to request it without any other option used as well.
See: #4393
SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_bool, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHome", "s", property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectSystem", "s", property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHome", "s", property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectSystem", "s", property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
if (streq(name, "PrivateDevices"))
return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
if (streq(name, "PrivateDevices"))
return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
+ if (streq(name, "PrivateMounts"))
+ return bus_set_transient_bool(u, name, &c->private_mounts, message, flags, error);
+
if (streq(name, "PrivateNetwork"))
return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
if (streq(name, "PrivateNetwork"))
return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
return true;
if (context->private_devices ||
return true;
if (context->private_devices ||
+ context->private_mounts ||
context->protect_system != PROTECT_SYSTEM_NO ||
context->protect_home != PROTECT_HOME_NO ||
context->protect_kernel_tunables ||
context->protect_system != PROTECT_SYSTEM_NO ||
context->protect_home != PROTECT_HOME_NO ||
context->protect_kernel_tunables ||
_cleanup_strv_free_ char **empty_directories = NULL;
char *tmp = NULL, *var = NULL;
const char *root_dir = NULL, *root_image = NULL;
_cleanup_strv_free_ char **empty_directories = NULL;
char *tmp = NULL, *var = NULL;
const char *root_dir = NULL, *root_image = NULL;
- NamespaceInfo ns_info = {};
bool needs_sandboxing;
BindMount *bind_mounts = NULL;
size_t n_bind_mounts = 0;
bool needs_sandboxing;
BindMount *bind_mounts = NULL;
size_t n_bind_mounts = 0;
- /*
- * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
- * sandbox info, otherwise enforce it, don't ignore protected paths and
- * fail if we are enable to apply the sandbox inside the mount namespace.
- */
- if (!context->dynamic_user && root_dir)
- ns_info.ignore_protect_paths = true;
-
needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
if (needs_sandboxing)
ns_info = (NamespaceInfo) {
.ignore_protect_paths = false,
if (needs_sandboxing)
ns_info = (NamespaceInfo) {
.ignore_protect_paths = false,
.protect_kernel_tunables = context->protect_kernel_tunables,
.protect_kernel_modules = context->protect_kernel_modules,
.mount_apivfs = context->mount_apivfs,
.protect_kernel_tunables = context->protect_kernel_tunables,
.protect_kernel_modules = context->protect_kernel_modules,
.mount_apivfs = context->mount_apivfs,
+ .private_mounts = context->private_mounts,
+ else if (!context->dynamic_user && root_dir)
+ /*
+ * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
+ * sandbox info, otherwise enforce it, don't ignore protected paths and
+ * fail if we are enable to apply the sandbox inside the mount namespace.
+ */
+ ns_info = (NamespaceInfo) {
+ .ignore_protect_paths = true,
+ };
+ else
+ ns_info = (NamespaceInfo) {};
r = setup_namespace(root_dir, root_image,
&ns_info, context->read_write_paths,
r = setup_namespace(root_dir, root_image,
&ns_info, context->read_write_paths,
bool private_network;
bool private_devices;
bool private_users;
bool private_network;
bool private_devices;
bool private_users;
ProtectSystem protect_system;
ProtectHome protect_home;
bool protect_kernel_tunables;
ProtectSystem protect_system;
ProtectHome protect_home;
bool protect_kernel_tunables;
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
+$1.PrivateMounts, config_parse_bool, 0, offsetof($1, exec_context.private_mounts)
$1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context.protect_system)
$1.ProtectHome, config_parse_protect_home, 0, offsetof($1, exec_context.protect_home)
$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context.mount_flags)
$1.ProtectSystem, config_parse_protect_system, 0, offsetof($1, exec_context.protect_system)
$1.ProtectHome, config_parse_protect_home, 0, offsetof($1, exec_context.protect_home)
$1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context.mount_flags)
_cleanup_free_ void *root_hash = NULL;
MountEntry *m, *mounts = NULL;
size_t root_hash_size = 0;
_cleanup_free_ void *root_hash = NULL;
MountEntry *m, *mounts = NULL;
size_t root_hash_size = 0;
- bool make_slave = false;
const char *root;
size_t n_mounts;
const char *root;
size_t n_mounts;
bool require_prefix = false;
int r = 0;
bool require_prefix = false;
int r = 0;
protect_home, protect_system);
/* Set mount slave mode */
protect_home, protect_system);
/* Set mount slave mode */
- if (root || n_mounts > 0)
- make_slave = true;
+ make_slave = root || n_mounts > 0 || ns_info->private_mounts;
if (n_mounts > 0) {
m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry));
if (n_mounts > 0) {
m = mounts = (MountEntry *) alloca0(n_mounts * sizeof(MountEntry));
struct NamespaceInfo {
bool ignore_protect_paths:1;
bool private_dev:1;
struct NamespaceInfo {
bool ignore_protect_paths:1;
bool private_dev:1;
bool protect_control_groups:1;
bool protect_kernel_tunables:1;
bool protect_kernel_modules:1;
bool protect_control_groups:1;
bool protect_kernel_tunables:1;
bool protect_kernel_modules:1;
if (STR_IN_SET(field,
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
if (STR_IN_SET(field,
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
- "NoNewPrivileges", "SyslogLevelPrefix",
+ "PrivateMounts", "NoNewPrivileges", "SyslogLevelPrefix",
"MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
"ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups",
"MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality"))
"MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
"ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups",
"MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality"))