]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core/service: introduce sd_notify() RESTART_RESET=1 for resetting restart counter
authorMike Yuan <me@yhndnzj.com>
Fri, 25 Oct 2024 23:51:04 +0000 (01:51 +0200)
committerMike Yuan <me@yhndnzj.com>
Mon, 10 Mar 2025 23:44:44 +0000 (00:44 +0100)
We have RestartMaxDelaySec= + RestartSteps= to exponentially increase
auto restart durations, but it currently cannot be reset by the service
itself, which makes it sometimes awkward to use. A typical pattern
in real life is that a service was once down (e.g. due to temporary
network interruption) and multiple restarts were attempted. Then,
future restarts would always wait for increated amount of time based on
RestartMaxDelaySec=, even after the original problem got resolved.
Such "persistence" could result in longer unavailablity than there
should be for failures that come later.
(C.f. https://utcc.utoronto.ca/~cks/space/blog/linux/SystemdResettingUnitBackoff)

Let's introduce a new sd_notify() notification for resetting the restart
counter. There were discussions about making this timer-based, but I think
it's more flexible to leave the decision-making to the service. This enables
them to do a combination of N successful requests + uptime check for instance.

man/sd_notify.xml
src/core/service.c

index c017f484870f4b7167917250fc93ca2ae173960e..746789e955e6d5169d5d1e14868d2811022f9920 100644 (file)
         <citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
         for information how to enable this functionality and
         <citerefentry><refentrytitle>sd_watchdog_enabled</refentrytitle><manvolnum>3</manvolnum></citerefentry>
-        for the details of how the service can check whether the watchdog is enabled. </para></listitem>
+        for the details of how the service can check whether the watchdog is enabled.</para></listitem>
       </varlistentry>
 
       <varlistentry>
         in time. Note that <varname>WatchdogSec=</varname> does not need to be enabled for
         <literal>WATCHDOG=trigger</literal> to trigger the watchdog action. See
         <citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>
-        for information about the watchdog behavior. </para>
+        for information about the watchdog behavior.</para>
 
         <xi:include href="version-info.xml" xpointer="v243"/></listitem>
       </varlistentry>
         <xi:include href="version-info.xml" xpointer="v236"/></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term>RESTART_RESET=1</term>
+
+        <listitem><para>Reset the restart counter of the service, which has the effect of restoring
+        the restart duration to <varname>RestartSec=</varname> if <varname>RestartSteps=</varname> and
+        <varname>RestartMaxDelaySec=</varname> are in use. For more information, refer to
+        <citerefentry><refentrytitle>systemd.service</refentrytitle><manvolnum>5</manvolnum></citerefentry>.
+        </para>
+
+        <xi:include href="version-info.xml" xpointer="v258"/></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term>FDSTORE=1</term>
 
index 4bad026537dd8f1f3d470031b25f69be0856b2e1..ccfa439dd0afe18b1940d27d6df25c00d283fc4d 100644 (file)
@@ -4861,6 +4861,17 @@ static void service_notify_message(
                         service_override_watchdog_timeout(s, watchdog_override_usec);
         }
 
+        /* Interpret RESTART_RESET=1 */
+        if (strv_contains(tags, "RESTART_RESET=1") && IN_SET(s->state, SERVICE_RUNNING, SERVICE_STOP)) {
+                log_unit_struct(u, LOG_NOTICE,
+                                LOG_UNIT_MESSAGE(u, "Got RESTART_RESET=1, resetting restart counter from %u.", s->n_restarts),
+                                "N_RESTARTS=0",
+                                LOG_UNIT_INVOCATION_ID(u));
+
+                s->n_restarts = 0;
+                notify_dbus = true;
+        }
+
         /* Process FD store messages. Either FDSTOREREMOVE=1 for removal, or FDSTORE=1 for addition. In both cases,
          * process FDNAME= for picking the file descriptor name to use. Note that FDNAME= is required when removing
          * fds, but optional when pushing in new fds, for compatibility reasons. */