]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: introduce a restart counter (#6495)
authorLennart Poettering <lennart@poettering.net>
Wed, 9 Aug 2017 19:12:55 +0000 (21:12 +0200)
committerGitHub <noreply@github.com>
Wed, 9 Aug 2017 19:12:55 +0000 (21:12 +0200)
This adds a per-service restart counter. Each time an automatic
restart is scheduled (due to Restart=) it is increased by one. Its
current value is exposed over the bus as NRestarts=. It is also logged
(in a structured, recognizable way) on each restart.

Note that this really only counts automatic starts triggered by Restart=
(which it nicely complements). Manual restarts will reset the counter,
as will explicit calls to "systemctl reset-failed". It's supposed to be
a tool for measure the automatic restart feature, and nothing else.

Fixes: #4126
src/core/dbus-service.c
src/core/service.c
src/core/service.h
src/systemd/sd-messages.h

index a20d4b3b99a3956145c40d358d72a436792b6db7..0b81d085fe5fe838730d416e1809952e39f9ca63 100644 (file)
@@ -67,6 +67,7 @@ const sd_bus_vtable bus_service_vtable[] = {
         SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("UID", "u", NULL, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_PROPERTY("GID", "u", NULL, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
+        SD_BUS_PROPERTY("NRestarts", "u", bus_property_get_unsigned, offsetof(Service, n_restarts), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
 
         BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION),
index 39fcdcc1a76bd929beb99ce10078ad32f5abc91c..e576f4ba83c877da9f6d235ab128eedfe617032d 100644 (file)
@@ -21,6 +21,8 @@
 #include <signal.h>
 #include <unistd.h>
 
+#include "sd-messages.h"
+
 #include "alloc-util.h"
 #include "async.h"
 #include "bus-error.h"
@@ -1514,7 +1516,10 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
                         goto fail;
 
                 service_set_state(s, SERVICE_AUTO_RESTART);
-        }
+        } else
+                /* If we shan't restart, then flush out the restart counter. But don't do that immediately, so that the
+                 * user can still introspect the counter. Do so on the next start. */
+                s->flush_n_restarts = true;
 
         /* The next restart might not be a manual stop, hence reset the flag indicating manual stops */
         s->forbid_restart = false;
@@ -1932,11 +1937,26 @@ static void service_enter_restart(Service *s) {
         if (r < 0)
                 goto fail;
 
+        /* Count the jobs we enqueue for restarting. This counter is maintained as long as the unit isn't fully
+         * stopped, i.e. as long as it remains up or remains in auto-start states. The use can reset the counter
+         * explicitly however via the usual "systemctl reset-failure" logic. */
+        s->n_restarts ++;
+        s->flush_n_restarts = false;
+
+        log_struct(LOG_INFO,
+                   "MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
+                   LOG_UNIT_ID(UNIT(s)),
+                   LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts),
+                   "N_RESTARTS=%u", s->n_restarts,
+                   NULL);
+
+        /* Notify clients about changed restart counter */
+        unit_add_to_dbus_queue(UNIT(s));
+
         /* Note that we stay in the SERVICE_AUTO_RESTART state here,
          * it will be canceled as part of the service_stop() call that
          * is executed as part of JOB_RESTART. */
 
-        log_unit_debug(UNIT(s), "Scheduled restart job.");
         return;
 
 fail:
@@ -2119,6 +2139,12 @@ static int service_start(Unit *u) {
         s->watchdog_override_enable = false;
         s->watchdog_override_usec = 0;
 
+        /* This is not an automatic restart? Flush the restart counter then */
+        if (s->flush_n_restarts) {
+                s->n_restarts = 0;
+                s->flush_n_restarts = false;
+        }
+
         service_enter_start_pre(s);
         return 1;
 }
@@ -2271,6 +2297,9 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
         unit_serialize_item(u, f, "bus-name-good", yes_no(s->bus_name_good));
         unit_serialize_item(u, f, "bus-name-owner", s->bus_name_owner);
 
+        unit_serialize_item_format(u, f, "n-restarts", "%u", s->n_restarts);
+        unit_serialize_item(u, f, "n-restarts", yes_no(s->flush_n_restarts));
+
         r = unit_serialize_item_escaped(u, f, "status-text", s->status_text);
         if (r < 0)
                 return r;
@@ -2636,6 +2665,18 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                 r = service_deserialize_exec_command(u, key, value);
                 if (r < 0)
                         log_unit_debug_errno(u, r, "Failed to parse serialized command \"%s\": %m", value);
+
+        } else if (streq(key, "n-restarts")) {
+                r = safe_atou(value, &s->n_restarts);
+                if (r < 0)
+                        log_unit_debug_errno(u, r, "Failed to parse serialized restart counter '%s': %m", value);
+
+        } else if (streq(key, "flush-n-restarts")) {
+                r = parse_boolean(value);
+                if (r < 0)
+                        log_unit_debug_errno(u, r, "Failed to parse serialized flush restart counter setting '%s': %m", value);
+                else
+                        s->flush_n_restarts = r;
         } else
                 log_unit_debug(u, "Unknown serialization key: %s", key);
 
@@ -3548,6 +3589,8 @@ static void service_reset_failed(Unit *u) {
 
         s->result = SERVICE_SUCCESS;
         s->reload_result = SERVICE_SUCCESS;
+        s->n_restarts = 0;
+        s->flush_n_restarts = false;
 }
 
 static int service_kill(Unit *u, KillWho who, int signo, sd_bus_error *error) {
index f4ba604f69a0bf8cdf99796b5af354b14f542011..0ac8bc9a675ad591a67e6f9ee06879ce0c015e1c 100644 (file)
@@ -193,6 +193,9 @@ struct Service {
         int stdin_fd;
         int stdout_fd;
         int stderr_fd;
+
+        unsigned n_restarts;
+        bool flush_n_restarts;
 };
 
 extern const UnitVTable service_vtable;
index f466d9b0628eb25ec010550fa26c79b20351e649..4bc248a4b1668943db00fca6d58ab5ee671a9e7e 100644 (file)
@@ -99,6 +99,10 @@ _SD_BEGIN_DECLARATIONS;
 #define SD_MESSAGE_UNIT_RELOADED          SD_ID128_MAKE(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54)
 #define SD_MESSAGE_UNIT_RELOADED_STR      SD_ID128_MAKE_STR(7b,05,eb,c6,68,38,42,22,ba,a8,88,11,79,cf,da,54)
 
+#define SD_MESSAGE_UNIT_RESTART_SCHEDULED SD_ID128_MAKE(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
+#define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR                   \
+                                          SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
+
 #define SD_MESSAGE_SPAWN_FAILED           SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
 #define SD_MESSAGE_SPAWN_FAILED_STR       SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)