From: Ɓukasz Stelmach Date: Mon, 20 May 2024 14:51:55 +0000 (+0200) Subject: core: drop ambient capabilities in systemd-executor X-Git-Tag: v257-rc1~791^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e0ebc81b2d194206c519375394bd67baa19e67ce;p=thirdparty%2Fsystemd.git core: drop ambient capabilities in systemd-executor Since the commit 963b6b906e ("core: drop ambient capabilities in user manager") systemd running as the session manager has dropped ambient capabilities retaining other sets allowing user services to be started with elevated capabilities. This, worked fine until the introduction of sd-executor. For a non-root process to be started with elevated capabilities by a non-root parent it either needs file capabilities or ambient capabilities in the parent process. Thus, systemd needs to allow sd-executor to inherit its ambient capabilities and sd-executor should drop them as systemd did before. The ambient set is managed for both system and session managers, but with the default set for PID#1 being empty, this code does not affect operation of PID#1. Fixes: bb5232b6a3 ("core: add systemd-executor binary") --- diff --git a/src/core/execute.c b/src/core/execute.c index cdc12779564..8a8e108b79f 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -466,6 +466,12 @@ int exec_spawn( _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; dual_timestamp start_timestamp; + /* Restore the original ambient capability set the manager was started with to pass it to + * sd-executor. */ + r = capability_ambient_set_apply(unit->manager->original_ambient_set, /* also_inherit= */ false); + if (r < 0) + return log_unit_error_errno(unit, r, "Failed to apply the starting ambient set: %m"); + /* Record the start timestamp before we fork so that it is guaranteed to be earlier than the * handoff timestamp. */ dual_timestamp_now(&start_timestamp); @@ -480,6 +486,10 @@ int exec_spawn( environ, cg_unified() > 0 ? subcgroup_path : NULL, &pidref); + + /* Drop the ambient set again, so no processes other than sd-executore spawned from the manager inherit it. */ + (void) capability_ambient_set_apply(0, /* also_inherit= */ false); + if (r == -EUCLEAN && subcgroup_path) return log_unit_error_errno(unit, r, "Failed to spawn process into cgroup '%s', because the cgroup " diff --git a/src/core/executor.c b/src/core/executor.c index bd0c742533a..acd0872d23e 100644 --- a/src/core/executor.c +++ b/src/core/executor.c @@ -8,6 +8,7 @@ #include "alloc-util.h" #include "argv-util.h" #include "build.h" +#include "capability-util.h" #include "exec-invoke.h" #include "execute-serialize.h" #include "execute.h" @@ -206,6 +207,11 @@ static int run(int argc, char *argv[]) { log_set_prohibit_ipc(false); log_open(); + /* Clear ambient capabilities, so services do not inherit them implicitly. Dropping them does + * not affect the permitted and effective sets which are important for the executor itself to + * operate. */ + capability_ambient_set_apply(0, /* also_inherit= */ false); + /* This call would collect all passed fds and enable CLOEXEC. We'll unset it in exec_invoke (flag_fds) * for fds that shall be passed to the child. * The serialization fd is set to CLOEXEC in parse_argv, so it's also filtered. */ diff --git a/src/core/main.c b/src/core/main.c index 6234eff7e54..401e4b5b946 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -1937,6 +1937,7 @@ static int do_reexecute( FDSet *fds, const char *switch_root_dir, const char *switch_root_init, + uint64_t capability_ambient_set, const char **ret_error_message) { size_t i, args_size; @@ -1998,6 +1999,10 @@ static int do_reexecute( log_error_errno(r, "Failed to switch root, trying to continue: %m"); } + r = capability_ambient_set_apply(capability_ambient_set, /* also_inherit= */ false); + if (r < 0) + log_error_errno(r, "Failed to apply the starting ambient set, ignoring: %m."); + args_size = argc + 5; args = newa(const char*, args_size); @@ -2373,6 +2378,7 @@ static int initialize_runtime( bool first_boot, struct rlimit *saved_rlimit_nofile, struct rlimit *saved_rlimit_memlock, + uint64_t *original_ambient_set, const char **ret_error_message) { int r; @@ -2487,11 +2493,6 @@ static int initialize_runtime( log_warning_errno(r, "Failed to copy os-release for propagation, ignoring: %m"); } - /* Clear ambient capabilities, so services do not inherit them implicitly. Dropping them does - * not affect the permitted and effective sets which are important for the manager itself to - * operate. */ - (void) capability_ambient_set_apply(0, /* also_inherit= */ false); - break; } @@ -2499,6 +2500,19 @@ static int initialize_runtime( assert_not_reached(); } + /* The two operations on the ambient set are meant for a user serssion manager. They do not affect + * system manager operation, because by default it starts with an empty ambient set. + * + * Preserve the ambient set for later use with sd-executor processes. */ + r = capability_get_ambient(original_ambient_set); + if (r < 0) + log_error_errno(r, "Failed to save ambient capabilities, ignoring: %m."); + + /* Clear ambient capabilities, so services do not inherit them implicitly. Dropping them does + * not affect the permitted and effective sets which are important for the manager itself to + * operate. */ + (void) capability_ambient_set_apply(0, /* also_inherit= */ false); + if (arg_timer_slack_nsec != NSEC_INFINITY) if (prctl(PR_SET_TIMERSLACK, arg_timer_slack_nsec) < 0) log_warning_errno(errno, "Failed to adjust timer slack, ignoring: %m"); @@ -2995,6 +3009,7 @@ int main(int argc, char *argv[]) { usec_t before_startup, after_startup; static char systemd[] = "systemd"; const char *error_message = NULL; + uint64_t original_ambient_set; int r, retval = EXIT_FAILURE; Manager *m = NULL; FDSet *fds = NULL; @@ -3269,6 +3284,7 @@ int main(int argc, char *argv[]) { first_boot, &saved_rlimit_nofile, &saved_rlimit_memlock, + &original_ambient_set, &error_message); if (r < 0) goto finish; @@ -3290,6 +3306,8 @@ int main(int argc, char *argv[]) { m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_START)] = security_start_timestamp; m->timestamps[manager_timestamp_initrd_mangle(MANAGER_TIMESTAMP_SECURITY_FINISH)] = security_finish_timestamp; + m->original_ambient_set = original_ambient_set; + set_manager_defaults(m); set_manager_settings(m); manager_set_first_boot(m, first_boot); @@ -3365,6 +3383,7 @@ finish: fds, switch_root_dir, switch_root_init, + original_ambient_set, &error_message); /* This only returns if reexecution failed */ arg_serialization = safe_fclose(arg_serialization); diff --git a/src/core/manager.h b/src/core/manager.h index 2dfaf33dd06..908a148196e 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -511,6 +511,9 @@ struct Manager { int executor_fd; unsigned soft_reboots_count; + + /* Original ambient capabilities when we were initialized */ + uint64_t original_ambient_set; }; static inline usec_t manager_default_timeout_abort_usec(Manager *m) {