From: Daan De Meyer Date: Wed, 11 Dec 2024 18:45:28 +0000 (+0000) Subject: core: Bind mount notify socket to /run/host/notify in sandboxed units X-Git-Tag: v258-rc1~1870^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F35573%2Fhead;p=thirdparty%2Fsystemd.git core: Bind mount notify socket to /run/host/notify in sandboxed units To be able to run systemd in a Type=notify transient unit, the notify socket can't be bind mounted to /run/systemd/notify as systemd in the transient unit wants to use that as its own notify socket which conflicts with systemd on the host. Instead, for sandboxed units, let's bind mount the notify socket to /run/host/notify as documented in the container interface. Since we don't guarantee a stable location for the notify socket and insist users use $NOTIFY_SOCKET to get its path, this is safe to do. --- diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 2c69dda36df..4ef7a28b114 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -1796,6 +1796,7 @@ static int build_environment( dev_t journal_stream_dev, ino_t journal_stream_ino, const char *memory_pressure_path, + bool needs_sandboxing, char ***ret) { _cleanup_strv_free_ char **our_env = NULL; @@ -1807,7 +1808,7 @@ static int build_environment( assert(p); assert(ret); -#define N_ENV_VARS 19 +#define N_ENV_VARS 20 our_env = new0(char*, N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX); if (!our_env) return -ENOMEM; @@ -2044,6 +2045,14 @@ static int build_environment( } } + if (p->notify_socket) { + x = strjoin("NOTIFY_SOCKET=", exec_get_private_notify_socket_path(c, p, needs_sandboxing) ?: p->notify_socket); + if (!x) + return -ENOMEM; + + our_env[n_env++] = x; + } + assert(n_env < N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX); #undef N_ENV_VARS @@ -3407,7 +3416,8 @@ static int apply_mount_namespace( .propagate_dir = propagate_dir, .incoming_dir = incoming_dir, .private_namespace_dir = private_namespace_dir, - .host_notify_socket = root_dir || root_image ? params->notify_socket : NULL, + .host_notify_socket = params->notify_socket, + .notify_socket_path = exec_get_private_notify_socket_path(context, params, needs_sandboxing), .host_os_release_stage = host_os_release_stage, /* If DynamicUser=no and RootDirectory= is set then lets pass a relaxed sandbox info, @@ -4847,6 +4857,7 @@ int exec_invoke( journal_stream_dev, journal_stream_ino, memory_pressure_path, + needs_sandboxing, &our_env); if (r < 0) { *exit_status = EXIT_MEMORY; diff --git a/src/core/execute.c b/src/core/execute.c index 40ab0ad1c53..a01096a7000 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -346,6 +346,28 @@ bool exec_needs_mount_namespace( return false; } +const char* exec_get_private_notify_socket_path(const ExecContext *context, const ExecParameters *params, bool needs_sandboxing) { + assert(context); + assert(params); + + if (!params->notify_socket) + return NULL; + + if (!needs_sandboxing) + return NULL; + + if (!context->root_directory && !context->root_image) + return NULL; + + if (!exec_context_get_effective_mount_apivfs(context)) + return NULL; + + if (!FLAGS_SET(params->flags, EXEC_APPLY_CHROOT)) + return NULL; + + return "/run/host/notify"; +} + bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) { assert(context); diff --git a/src/core/execute.h b/src/core/execute.h index 63a56a900cb..f1b94e7f4c0 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -632,6 +632,7 @@ ProtectControlGroups exec_get_protect_control_groups(const ExecContext *context, bool exec_needs_cgroup_namespace(const ExecContext *context, const ExecParameters *params); bool exec_needs_cgroup_mount(const ExecContext *context, const ExecParameters *params); bool exec_is_cgroup_mount_read_only(const ExecContext *context, const ExecParameters *params); +const char* exec_get_private_notify_socket_path(const ExecContext *context, const ExecParameters *params, bool needs_sandboxing); /* These logging macros do the same logging as those in unit.h, but using ExecContext and ExecParameters * instead of the unit object, so that it can be used in the sd-executor context (where the unit object is diff --git a/src/core/namespace.c b/src/core/namespace.c index 47b0314485c..72f31c7a010 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -2752,13 +2752,13 @@ int setup_namespace(const NamespaceParameters *p, char **reterr_path) { }; } - if (p->host_notify_socket) { + if (p->notify_socket_path) { MountEntry *me = mount_list_extend(&ml); if (!me) return log_oom_debug(); *me = (MountEntry) { - .path_const = p->host_notify_socket, + .path_const = p->notify_socket_path, .source_const = p->host_notify_socket, .mode = MOUNT_BIND, .read_only = true, diff --git a/src/core/namespace.h b/src/core/namespace.h index 02b0f775632..66651a8e794 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -174,6 +174,7 @@ struct NamespaceParameters { const char *private_namespace_dir; const char *host_notify_socket; + const char *notify_socket_path; const char *host_os_release_stage; bool ignore_protect_paths; diff --git a/src/core/service.c b/src/core/service.c index c7c5225115c..e5d23f87dd0 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1769,14 +1769,11 @@ static int service_spawn_internal( if (r < 0) return r; - our_env = new0(char*, 14); + our_env = new0(char*, 13); if (!our_env) return -ENOMEM; if (service_exec_needs_notify_socket(s, exec_params.flags)) { - if (asprintf(our_env + n_env++, "NOTIFY_SOCKET=%s", UNIT(s)->manager->notify_socket) < 0) - return -ENOMEM; - exec_params.notify_socket = UNIT(s)->manager->notify_socket; if (s->n_fd_store_max > 0) diff --git a/test/units/TEST-50-DISSECT.dissect.sh b/test/units/TEST-50-DISSECT.dissect.sh index 6cf12135517..60cbe9bc980 100755 --- a/test/units/TEST-50-DISSECT.dissect.sh +++ b/test/units/TEST-50-DISSECT.dissect.sh @@ -2,7 +2,7 @@ # SPDX-License-Identifier: LGPL-2.1-or-later # -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*- # ex: ts=8 sw=4 sts=4 et filetype=sh -# shellcheck disable=SC2233,SC2235 +# shellcheck disable=SC2233,SC2235,SC2016 set -eux set -o pipefail @@ -79,6 +79,11 @@ systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" mountpoint /run/systemd/jou (! systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" -p BindLogSockets=no ls /run/systemd/journal/socket) (! systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" -p MountAPIVFS=no ls /run/systemd/journal/socket) +# Test that the notify socket is bind mounted to /run/host/notify in sandboxed environments and +# $NOTIFY_SOCKET is set correctly. +systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" -p NotifyAccess=all --service-type=notify --pipe sh -c 'echo READY=1 | ncat --unixsock --udp $NOTIFY_SOCKET --source /run/notify && ls /run/host/notify' +systemd-run --wait -p RootImage="$MINIMAL_IMAGE.raw" -p NotifyAccess=all --service-type=notify --pipe sh -c 'echo READY=1 | ncat --unixsock --udp $NOTIFY_SOCKET --source /run/notify && env' | grep NOTIFY_SOCKET=/run/host/notify + systemd-run -P -p RootImage="$MINIMAL_IMAGE.raw" cat /usr/lib/os-release | grep -q -F "MARKER=1" mv "$MINIMAL_IMAGE.verity" "$MINIMAL_IMAGE.fooverity" mv "$MINIMAL_IMAGE.roothash" "$MINIMAL_IMAGE.foohash"