From: Adrian Vovk Date: Wed, 24 Jan 2024 00:50:21 +0000 (-0500) Subject: core: Fail to start/stop/reload unit if frozen X-Git-Tag: v256-rc1~1008^2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F31039%2Fhead;p=thirdparty%2Fsystemd.git core: Fail to start/stop/reload unit if frozen Previously, unit_{start,stop,reload} would call the low-level cgroup unfreeze function whenever a unit was started, stopped, or reloaded. It did so with no error checking. This call would ultimately recurse up the cgroup tree, and unfreeze all the parent cgroups of the unit, unless an error occurred (in which case I have no idea what would happen...) After the freeze/thaw rework in a previous commit, this can no longer work. If we recursively thaw the parent cgroups of the unit, there may be sibling units marked as PARENT_FROZEN which will no longer actually have frozen parents. Fixing this is a lot more complicated than simply disallowing start/stop/reload on a frozen unit Fixes https://github.com/systemd/systemd/issues/15849 --- diff --git a/src/core/job.c b/src/core/job.c index e78c2a70db6..595fbce93ae 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -633,16 +633,19 @@ static const char* job_done_message_format(Unit *u, JobType t, JobResult result) [JOB_UNSUPPORTED] = "Starting of %s unsupported.", [JOB_COLLECTED] = "Unnecessary job was removed for %s.", [JOB_ONCE] = "Unit %s has been started before and cannot be started again.", + [JOB_FROZEN] = "Cannot start frozen unit %s.", }; static const char* const generic_finished_stop_job[_JOB_RESULT_MAX] = { [JOB_DONE] = "Stopped %s.", [JOB_FAILED] = "Stopped %s with error.", [JOB_TIMEOUT] = "Timed out stopping %s.", + [JOB_FROZEN] = "Cannot stop frozen unit %s.", }; static const char* const generic_finished_reload_job[_JOB_RESULT_MAX] = { [JOB_DONE] = "Reloaded %s.", [JOB_FAILED] = "Reload failed for %s.", [JOB_TIMEOUT] = "Timed out reloading %s.", + [JOB_FROZEN] = "Cannot reload frozen unit %s.", }; /* When verify-active detects the unit is inactive, report it. * Most likely a DEPEND warning from a requisiting unit will @@ -704,6 +707,7 @@ static const struct { [JOB_UNSUPPORTED] = { LOG_WARNING, ANSI_HIGHLIGHT_YELLOW, "UNSUPP" }, [JOB_COLLECTED] = { LOG_INFO, }, [JOB_ONCE] = { LOG_ERR, ANSI_HIGHLIGHT_RED, " ONCE " }, + [JOB_FROZEN] = { LOG_ERR, ANSI_HIGHLIGHT_RED, "FROZEN" }, }; static const char* job_done_mid(JobType type, JobResult result) { @@ -954,6 +958,8 @@ int job_run_and_invalidate(Job *j) { r = job_finish_and_invalidate(j, JOB_DEPENDENCY, true, false); else if (r == -ESTALE) r = job_finish_and_invalidate(j, JOB_ONCE, true, false); + else if (r == -EDEADLK) + r = job_finish_and_invalidate(j, JOB_FROZEN, true, false); else if (r < 0) r = job_finish_and_invalidate(j, JOB_FAILED, true, false); } @@ -1011,7 +1017,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr goto finish; } - if (IN_SET(result, JOB_FAILED, JOB_INVALID)) + if (IN_SET(result, JOB_FAILED, JOB_INVALID, JOB_FROZEN)) j->manager->n_failed_jobs++; job_uninstall(j); @@ -1645,6 +1651,7 @@ static const char* const job_result_table[_JOB_RESULT_MAX] = { [JOB_UNSUPPORTED] = "unsupported", [JOB_COLLECTED] = "collected", [JOB_ONCE] = "once", + [JOB_FROZEN] = "frozen", }; DEFINE_STRING_TABLE_LOOKUP(job_result, JobResult); diff --git a/src/core/job.h b/src/core/job.h index 891d87a79cd..8318b524b44 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -96,6 +96,7 @@ enum JobResult { JOB_UNSUPPORTED, /* Couldn't start a unit, because the unit type is not supported on the system */ JOB_COLLECTED, /* Job was garbage collected, since nothing needed it anymore */ JOB_ONCE, /* Unit was started before, and hence can't be started again */ + JOB_FROZEN, /* Unit is currently frozen, so we can't safely operate on it */ _JOB_RESULT_MAX, _JOB_RESULT_INVALID = -EINVAL, }; diff --git a/src/core/unit.c b/src/core/unit.c index b4634e0a86a..6f61a2285e3 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -1956,6 +1956,10 @@ int unit_start(Unit *u, ActivationDetails *details) { return unit_start(following, details); } + /* Check to make sure the unit isn't frozen */ + if (u->freezer_state != FREEZER_RUNNING) + return -EDEADLK; + /* Check our ability to start early so that failure conditions don't cause us to enter a busy loop. */ if (UNIT_VTABLE(u)->can_start) { r = UNIT_VTABLE(u)->can_start(u); @@ -1972,7 +1976,6 @@ int unit_start(Unit *u, ActivationDetails *details) { * waits for a holdoff timer to elapse before it will start again. */ unit_add_to_dbus_queue(u); - unit_cgroup_freezer_action(u, FREEZER_THAW); if (!u->activation_details) /* Older details object wins */ u->activation_details = activation_details_ref(details); @@ -2007,6 +2010,7 @@ bool unit_can_isolate(Unit *u) { * -EBADR: This unit type does not support stopping. * -EALREADY: Unit is already stopped. * -EAGAIN: An operation is already in progress. Retry later. + * -EDEADLK: Unit is frozen */ int unit_stop(Unit *u) { UnitActiveState state; @@ -2024,11 +2028,14 @@ int unit_stop(Unit *u) { return unit_stop(following); } + /* Check to make sure the unit isn't frozen */ + if (u->freezer_state != FREEZER_RUNNING) + return -EDEADLK; + if (!UNIT_VTABLE(u)->stop) return -EBADR; unit_add_to_dbus_queue(u); - unit_cgroup_freezer_action(u, FREEZER_THAW); return UNIT_VTABLE(u)->stop(u); } @@ -2053,6 +2060,7 @@ bool unit_can_stop(Unit *u) { * -EBADR: This unit type does not support reloading. * -ENOEXEC: Unit is not started. * -EAGAIN: An operation is already in progress. Retry later. + * -EDEADLK: Unit is frozen. */ int unit_reload(Unit *u) { UnitActiveState state; @@ -2079,6 +2087,10 @@ int unit_reload(Unit *u) { return unit_reload(following); } + /* Check to make sure the unit isn't frozen */ + if (u->freezer_state != FREEZER_RUNNING) + return -EDEADLK; + unit_add_to_dbus_queue(u); if (!UNIT_VTABLE(u)->reload) { @@ -2087,8 +2099,6 @@ int unit_reload(Unit *u) { return 0; } - unit_cgroup_freezer_action(u, FREEZER_THAW); - return UNIT_VTABLE(u)->reload(u); } diff --git a/src/shared/bus-wait-for-jobs.c b/src/shared/bus-wait-for-jobs.c index 70f2043347f..2930552be48 100644 --- a/src/shared/bus-wait-for-jobs.c +++ b/src/shared/bus-wait-for-jobs.c @@ -248,6 +248,8 @@ static int check_wait_response(BusWaitForJobs *d, WaitJobsFlags flags, const cha log_error("Queued job for %s was garbage collected.", d->name); else if (streq(d->result, "once")) log_error("Unit %s was started already once and can't be started again.", d->name); + else if (streq(d->result, "frozen")) + log_error("Cannot perform operation on frozen unit %s.", d->name); else if (endswith(d->name, ".service")) { /* Job result is unknown. For services, let's also try Result property. */ _cleanup_free_ char *result = NULL; @@ -276,6 +278,8 @@ static int check_wait_response(BusWaitForJobs *d, WaitJobsFlags flags, const cha return -EOPNOTSUPP; else if (streq(d->result, "once")) return -ESTALE; + else if (streq(d->result, "frozen")) + return -EDEADLK; return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM), "Unexpected job result '%s' for unit '%s', assuming server side newer than us.", diff --git a/test/units/testsuite-38.sh b/test/units/testsuite-38.sh index 0737b9c199b..e557fa1dbf9 100755 --- a/test/units/testsuite-38.sh +++ b/test/units/testsuite-38.sh @@ -149,31 +149,6 @@ testcase_dbus_api() { echo } -testcase_jobs() { - local pid_before= - local pid_after= - echo "Test that it is possible to apply jobs on frozen units:" - - systemctl start "${unit}" - dbus_freeze "${unit}" - check_freezer_state "${unit}" "frozen" - - echo -n " - restart: " - pid_before=$(systemctl show -p MainPID "${unit}" --value) - systemctl restart "${unit}" - pid_after=$(systemctl show -p MainPID "${unit}" --value) - [ "$pid_before" != "$pid_after" ] && echo "[ OK ]" - - dbus_freeze "${unit}" - check_freezer_state "${unit}" "frozen" - - echo -n " - stop: " - timeout 5s systemctl stop "${unit}" - echo "[ OK ]" - - echo -} - testcase_systemctl() { echo "Test that systemctl freeze/thaw verbs:" @@ -321,6 +296,11 @@ testcase_recursive() { check_cgroup_state "$slice/$unit" 1 echo "[ OK ]" + echo -n " - can't stop a frozen unit: " + (! systemctl -q stop "$unit" ) + echo "[ OK ]" + systemctl thaw "$unit" + systemctl stop "$unit" systemctl stop "$slice" @@ -368,6 +348,7 @@ testcase_preserve_state() { check_freezer_state "$unit" "frozen" echo "[ OK ]" + systemctl thaw "$unit" systemctl stop "$unit" systemctl stop "$slice"