From c2da3bf2376440526e87da6124660d22ac2079d6 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Fri, 17 Feb 2023 13:50:17 +0900 Subject: [PATCH] core/namespace: mount new sysfs when new network namespace is requested Even when a mount namespace is created, previously host's sysfs is used, especially with RootDirectory= or RootImage=, thus service processes can still access the properties of the network interfaces in the main network namespace through sysfs. This makes, sysfs is remounted with the new network namespace tag, except when PrivateMounts= is explicitly disabled. Hence, the properties of the network interfaces in the main network namespace cannot be accessed by service processes through sysfs. Fixes #26422. --- man/systemd.exec.xml | 8 ++++++++ src/core/execute.c | 2 ++ src/core/namespace.c | 7 +++++++ src/core/namespace.h | 1 + 4 files changed, 18 insertions(+) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index bad21ceb0c2..daa2a595f05 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1663,6 +1663,10 @@ BindReadOnlyPaths=/var/lib/systemd not available), and the unit should be written in a way that does not solely rely on this setting for security. + When this option is enabled, PrivateMounts= is implied unless it is + explicitly disabled, and /sys will be remounted to associate it with the new + network namespace. + When this option is used on a socket unit any sockets bound on behalf of this unit will be bound within a private network namespace. This may be combined with JoinsNamespaceOf= to listen on sockets inside of network namespaces of other @@ -1684,6 +1688,10 @@ BindReadOnlyPaths=/var/lib/systemd NetworkNamespacePath= configured, as otherwise the network namespace of those units is reused. + When this option is enabled, PrivateMounts= is implied unless it is + explicitly disabled, and /sys will be remounted to associate it with the new + network namespace. + When this option is used on a socket unit any sockets bound on behalf of this unit will be bound within the specified network namespace. diff --git a/src/core/execute.c b/src/core/execute.c index 39ece6e7358..3971695fb66 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2075,6 +2075,7 @@ bool exec_needs_mount_namespace( if (context->private_devices || context->private_mounts > 0 || + (context->private_mounts < 0 && exec_needs_network_namespace(context)) || context->protect_system != PROTECT_SYSTEM_NO || context->protect_home != PROTECT_HOME_NO || context->protect_kernel_tunables || @@ -3606,6 +3607,7 @@ static int apply_mount_namespace( .protect_system = context->protect_system, .protect_proc = context->protect_proc, .proc_subset = context->proc_subset, + .private_network = exec_needs_network_namespace(context), .private_ipc = exec_needs_ipc_namespace(context), /* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */ .mount_nosuid = context->no_new_privileges && !mac_selinux_use(), diff --git a/src/core/namespace.c b/src/core/namespace.c index 8e3c272f40f..3b0896039bc 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -1748,6 +1748,7 @@ static size_t namespace_calculate_mounts( !!log_namespace + setup_propagate + /* /run/systemd/incoming */ !!notify_socket + + ns_info->private_network + /* /sys */ ns_info->private_ipc; /* /dev/mqueue */ } @@ -2354,6 +2355,12 @@ int setup_namespace( }; } + if (ns_info->private_network) + *(m++) = (MountEntry) { + .path_const = "/sys", + .mode = PRIVATE_SYSFS, + }; + if (ns_info->private_ipc) *(m++) = (MountEntry) { .path_const = "/dev/mqueue", diff --git a/src/core/namespace.h b/src/core/namespace.h index 70e27994cc4..74f78784b68 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -61,6 +61,7 @@ struct NamespaceInfo { bool protect_kernel_logs; bool mount_apivfs; bool protect_hostname; + bool private_network; bool private_ipc; bool mount_nosuid; ProtectHome protect_home; -- 2.47.3