]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: introduce a new job mode JOB_RESTART_DEPENDENCIES
authorLennart Poettering <lennart@poettering.net>
Fri, 30 Jun 2023 16:17:06 +0000 (18:17 +0200)
committerLennart Poettering <lennart@poettering.net>
Mon, 3 Jul 2023 15:31:25 +0000 (17:31 +0200)
This new job mode will enqueue a start job for a unit, and all units
depending on the unit will get a restart job enqueued. This is then used
for automatic sevice restarts: the unit itself is only started, the
depending units restarted. This way the unit will not go down
unnecessarily, triggering OnSuccess= needlessly.

This also introduces a new state SERVICE_AUTO_RESTART_QUEUED that is
entered once the restart jobs are enqueued. Previously we'd stay in
SERVICE_AUTO_RESTART, but that's problematic, since we'd lose
information whether we still need to enqueue the restart job during a
serialization/deserialization cycle or not. By having an explicit state
for this we know exactly whether we still need to enqueue the job or
not. It's also good since when we are in SERVICE_AUTO_RESTART_QUEUED we
want to act on unit_start(), but on SERVICE_AUTO_RESTART we want to wait
for the holdoff time to pass before we act on unit_start().

Fixes: #27722
src/basic/unit-def.c
src/basic/unit-def.h
src/core/job.c
src/core/job.h
src/core/manager.c
src/core/service.c
src/core/transaction.c
src/core/transaction.h

index bfd748ada4aa0d7b194527b468cfdec5eb846995..595e4414c7e0a04e57ac53f503df70dfb95fd3b4 100644 (file)
@@ -204,6 +204,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = {
         [SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart",
         [SERVICE_DEAD_RESOURCES_PINNED]      = "dead-resources-pinned",
         [SERVICE_AUTO_RESTART]               = "auto-restart",
+        [SERVICE_AUTO_RESTART_QUEUED]        = "auto-restart-queued",
         [SERVICE_CLEANING]                   = "cleaning",
 };
 
index 7bd86cb1d2ff4af2ec9215ffc90c7091c2ac0cb5..f67fc1d620d8e04af7efbd21dcff9a49fcf3b692 100644 (file)
@@ -149,6 +149,7 @@ typedef enum ServiceState {
         SERVICE_FAILED_BEFORE_AUTO_RESTART,
         SERVICE_DEAD_RESOURCES_PINNED,  /* Like SERVICE_DEAD, but with pinned resources */
         SERVICE_AUTO_RESTART,
+        SERVICE_AUTO_RESTART_QUEUED,
         SERVICE_CLEANING,
         _SERVICE_STATE_MAX,
         _SERVICE_STATE_INVALID = -EINVAL,
index 50f9581d727cd5657b851bf73dac99f4ffada2b5..3d5e4e42d126ae064d865ee79c0be28f53eb54a3 100644 (file)
@@ -1634,6 +1634,7 @@ static const char* const job_mode_table[_JOB_MODE_MAX] = {
         [JOB_IGNORE_DEPENDENCIES]  = "ignore-dependencies",
         [JOB_IGNORE_REQUIREMENTS]  = "ignore-requirements",
         [JOB_TRIGGERING]           = "triggering",
+        [JOB_RESTART_DEPENDENCIES] = "restart-dependencies",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(job_mode, JobMode);
index f3e0b93fed5a6e308f9880cbe43d7d83b5cc3b95..d3b98d98b6111435d17383e091633c794289d929 100644 (file)
@@ -79,6 +79,7 @@ enum JobMode {
         JOB_IGNORE_DEPENDENCIES, /* Ignore both requirement and ordering dependencies */
         JOB_IGNORE_REQUIREMENTS, /* Ignore requirement dependencies */
         JOB_TRIGGERING,          /* Adds TRIGGERED_BY dependencies to the same transaction */
+        JOB_RESTART_DEPENDENCIES,/* A "start" job for the specified unit becomes "restart" for depending units */
         _JOB_MODE_MAX,
         _JOB_MODE_INVALID = -EINVAL,
 };
index f4591002944d5225ca7747dbbfd75cca5fc05d20..cc4fc1679c2c0fe7e8a9bbf7c494b6fae64e1d04 100644 (file)
@@ -2044,6 +2044,9 @@ int manager_add_job(
         if (mode == JOB_TRIGGERING && type != JOB_STOP)
                 return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=triggering is only valid for stop.");
 
+        if (mode == JOB_RESTART_DEPENDENCIES && type != JOB_START)
+                return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "--job-mode=restart-dependencies is only valid for start.");
+
         log_unit_debug(unit, "Trying to enqueue job %s/%s/%s", unit->id, job_type_to_string(type), job_mode_to_string(mode));
 
         type = job_type_collapse(type, unit);
@@ -2059,7 +2062,8 @@ int manager_add_job(
                         /* by= */ NULL,
                         TRANSACTION_MATTERS |
                         (IN_SET(mode, JOB_IGNORE_DEPENDENCIES, JOB_IGNORE_REQUIREMENTS) ? TRANSACTION_IGNORE_REQUIREMENTS : 0) |
-                        (mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0),
+                        (mode == JOB_IGNORE_DEPENDENCIES ? TRANSACTION_IGNORE_ORDER : 0) |
+                        (mode == JOB_RESTART_DEPENDENCIES ? TRANSACTION_PROPAGATE_START_AS_RESTART : 0),
                         error);
         if (r < 0)
                 return r;
index 7f6ee26209d2b8d6330817b9fa4d48ff1a14c202..0759838e3b9b699b92e56e70defdb30dc18c6787 100644 (file)
@@ -72,6 +72,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
         [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
         [SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
         [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
+        [SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
         [SERVICE_CLEANING] = UNIT_MAINTENANCE,
 };
 
@@ -101,6 +102,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] =
         [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED,
         [SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE,
         [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING,
+        [SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING,
         [SERVICE_CLEANING] = UNIT_MAINTENANCE,
 };
 
@@ -1258,7 +1260,7 @@ static void service_set_state(Service *s, ServiceState state) {
 
         if (IN_SET(state,
                    SERVICE_DEAD, SERVICE_FAILED,
-                   SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
+                   SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
                    SERVICE_DEAD_RESOURCES_PINNED)) {
                 unit_unwatch_all_pids(UNIT(s));
                 unit_dequeue_rewatch_pids(UNIT(s));
@@ -1363,7 +1365,7 @@ static int service_coldplug(Unit *u) {
 
         if (!IN_SET(s->deserialized_state,
                     SERVICE_DEAD, SERVICE_FAILED,
-                    SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART,
+                    SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED,
                     SERVICE_CLEANING,
                     SERVICE_DEAD_RESOURCES_PINNED)) {
                 (void) unit_enqueue_rewatch_pids(u);
@@ -1945,7 +1947,7 @@ static bool service_will_restart(Unit *u) {
 
         assert(s);
 
-        if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART))
+        if (IN_SET(s->state, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, SERVICE_AUTO_RESTART_QUEUED))
                 return true;
 
         return unit_will_restart_default(u);
@@ -2511,11 +2513,9 @@ static void service_enter_restart(Service *s) {
                 return;
         }
 
-        /* Any units that are bound to this service must also be
-         * restarted. We use JOB_RESTART (instead of the more obvious
-         * JOB_START) here so that those dependency jobs will be added
-         * as well. */
-        r = manager_add_job(UNIT(s)->manager, JOB_RESTART, UNIT(s), JOB_REPLACE, NULL, &error, NULL);
+        /* Any units that are bound to this service must also be restarted. We use JOB_START for ourselves
+         * but then set JOB_RESTART_DEPENDENCIES which will enqueue JOB_RESTART for those dependency jobs. */
+        r = manager_add_job(UNIT(s)->manager, JOB_START, UNIT(s), JOB_RESTART_DEPENDENCIES, NULL, &error, NULL);
         if (r < 0)
                 goto fail;
 
@@ -2534,11 +2534,10 @@ static void service_enter_restart(Service *s) {
                                          "Scheduled restart job, restart counter is at %u.", s->n_restarts),
                         "N_RESTARTS=%u", s->n_restarts);
 
+        service_set_state(s, SERVICE_AUTO_RESTART_QUEUED);
+
         /* Notify clients about changed restart counter */
         unit_add_to_dbus_queue(UNIT(s));
-
-        /* Note that we stay in the SERVICE_AUTO_RESTART state here, it will be canceled as part of the
-         * service_stop() call that is executed as part of JOB_RESTART. */
         return;
 
 fail:
@@ -2717,7 +2716,7 @@ static int service_start(Unit *u) {
         if (IN_SET(s->state, SERVICE_AUTO_RESTART, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART))
                 return -EAGAIN;
 
-        assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED));
+        assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED, SERVICE_AUTO_RESTART_QUEUED));
 
         r = unit_acquire_invocation_id(u);
         if (r < 0)
@@ -2775,7 +2774,8 @@ static int service_stop(Unit *u) {
                 return 0;
 
         case SERVICE_AUTO_RESTART:
-                /* A restart will be scheduled or is in progress. */
+        case SERVICE_AUTO_RESTART_QUEUED:
+                /* Give up on the auto restart */
                 service_set_state(s, service_determine_dead_state(s));
                 return 0;
 
@@ -3648,6 +3648,7 @@ static void service_notify_cgroup_empty_event(Unit *u) {
         /* If the cgroup empty notification comes when the unit is not active, we must have failed to clean
          * up the cgroup earlier and should do it now. */
         case SERVICE_AUTO_RESTART:
+        case SERVICE_AUTO_RESTART_QUEUED:
                 unit_prune_cgroup(u);
                 break;
 
index 9c1523c47ad2824357a64ae1ccd70f231d27ad2b..68614de7a063e751548898adcd452c24a9f9f42b 100644 (file)
@@ -1054,34 +1054,38 @@ int transaction_add_job_and_dependencies(
                         }
                 }
 
-                if (IN_SET(type, JOB_STOP, JOB_RESTART)) {
-                        _cleanup_set_free_ Set *propagated_restart = NULL;
-                        /* We propagate STOP as STOP, but RESTART only as TRY_RESTART, in order not to start
-                         * dependencies that are not around. */
+                _cleanup_set_free_ Set *propagated_restart = NULL;
 
-                        if (type == JOB_RESTART)
-                                UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) {
-                                        JobType nt;
+                if (type == JOB_RESTART || (type == JOB_START && FLAGS_SET(flags, TRANSACTION_PROPAGATE_START_AS_RESTART))) {
 
-                                        r = set_ensure_put(&propagated_restart, NULL, dep);
-                                        if (r < 0)
-                                                return r;
+                        /* We propagate RESTART only as TRY_RESTART, in order not to start dependencies that
+                         * are not around. */
+
+                        UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_RESTART) {
+                                JobType nt;
 
-                                        nt = job_type_collapse(JOB_TRY_RESTART, dep);
-                                        if (nt == JOB_NOP)
-                                                continue;
+                                r = set_ensure_put(&propagated_restart, NULL, dep);
+                                if (r < 0)
+                                        return r;
 
-                                        r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e);
-                                        if (r < 0) {
-                                                if (r != -EBADR) /* job type not applicable */
-                                                        return r;
+                                nt = job_type_collapse(JOB_TRY_RESTART, dep);
+                                if (nt == JOB_NOP)
+                                        continue;
 
-                                                sd_bus_error_free(e);
-                                        }
+                                r = transaction_add_job_and_dependencies(tr, nt, dep, ret, TRANSACTION_MATTERS | (flags & TRANSACTION_IGNORE_ORDER), e);
+                                if (r < 0) {
+                                        if (r != -EBADR) /* job type not applicable */
+                                                return r;
+
+                                        sd_bus_error_free(e);
                                 }
+                        }
+                }
 
+                if (type == JOB_STOP) {
                         /* The 'stop' part of a restart job is also propagated to units with
                          * UNIT_ATOM_PROPAGATE_STOP */
+
                         UNIT_FOREACH_DEPENDENCY(dep, ret->unit, UNIT_ATOM_PROPAGATE_STOP) {
                                 /* Units experienced restart propagation are skipped */
                                 if (set_contains(propagated_restart, dep))
index 5874077aefdc4b3981a434101859190a7249dd69..db2d0566499a2f18cb635802286454c57a65e369 100644 (file)
@@ -21,10 +21,13 @@ Transaction *transaction_abort_and_free(Transaction *tr);
 DEFINE_TRIVIAL_CLEANUP_FUNC(Transaction*, transaction_abort_and_free);
 
 typedef enum TransactionAddFlags {
-        TRANSACTION_MATTERS             = 1 << 0,
-        TRANSACTION_CONFLICTS           = 1 << 1,
-        TRANSACTION_IGNORE_REQUIREMENTS = 1 << 2,
-        TRANSACTION_IGNORE_ORDER        = 1 << 3,
+        TRANSACTION_MATTERS                    = 1 << 0,
+        TRANSACTION_CONFLICTS                  = 1 << 1,
+        TRANSACTION_IGNORE_REQUIREMENTS        = 1 << 2,
+        TRANSACTION_IGNORE_ORDER               = 1 << 3,
+
+        /* Propagate a START job to other units like a RESTART */
+        TRANSACTION_PROPAGATE_START_AS_RESTART = 1 << 4,
 } TransactionAddFlags;
 
 void transaction_add_propagate_reload_jobs(