]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
manager: Fix HW watchdog when systemd starts before driver loaded
authorMichael Marley <michael@michaelmarley.com>
Tue, 8 Dec 2020 02:27:38 +0000 (21:27 -0500)
committerLuca Boccassi <luca.boccassi@gmail.com>
Wed, 9 Dec 2020 11:47:22 +0000 (11:47 +0000)
When manager_{set|override}_watchdog is called, set the watchdog timeout
regardless of whether the hardware watchdog was successfully initialized.  If
the watchdog was requested but could not be initialized, then instead of
pinging it, attempt to initialize it again.  This ensures that the hardware
watchdog is initialized even if the kernel module for it isn't loaded when
systemd starts (which is quite likely, unless it is compiled in).

This builds on work by @danc86 in https://github.com/systemd/systemd/pull/17460,
but fixes the issue of not updating the watchdog timeout with the actual value
from the hardware.

Fixes https://github.com/systemd/systemd/issues/17838

Co-authored-by: Dan Callaghan <djc@djc.id.au>
Co-authored-by: Michael Marley <michael@michaelmarley.com>
src/core/manager.c
src/core/manager.h

index 1f1450b97c171b8dc31e1aca13d5cce0d6aef116..4b215a617665663f8a9eadc298b401c459341fd6 100644 (file)
@@ -2937,8 +2937,10 @@ int manager_loop(Manager *m) {
                 usec_t wait_usec, watchdog_usec;
 
                 watchdog_usec = manager_get_watchdog(m, WATCHDOG_RUNTIME);
-                if (timestamp_is_set(watchdog_usec))
+                if (m->runtime_watchdog_running)
                         (void) watchdog_ping();
+                else if (timestamp_is_set(watchdog_usec))
+                        manager_retry_runtime_watchdog(m);
 
                 if (!ratelimit_below(&rl)) {
                         /* Yay, something is going seriously wrong, pause a little */
@@ -3408,14 +3410,18 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
 
         if (t == WATCHDOG_RUNTIME)
                 if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME])) {
-                        if (timestamp_is_set(timeout))
+                        if (timestamp_is_set(timeout)) {
                                 r = watchdog_set_timeout(&timeout);
-                        else
+
+                                if (r >= 0)
+                                        m->runtime_watchdog_running = true;
+                        } else {
                                 watchdog_close(true);
+                                m->runtime_watchdog_running = false;
+                        }
                 }
 
-        if (r >= 0)
-                m->watchdog[t] = timeout;
+        m->watchdog[t] = timeout;
 }
 
 int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
@@ -3433,18 +3439,36 @@ int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
                 usec_t *p;
 
                 p = timestamp_is_set(timeout) ? &timeout : &m->watchdog[t];
-                if (timestamp_is_set(*p))
+                if (timestamp_is_set(*p)) {
                         r = watchdog_set_timeout(p);
-                else
+
+                        if (r >= 0)
+                                m->runtime_watchdog_running = true;
+                } else {
                         watchdog_close(true);
+                        m->runtime_watchdog_running = false;
+                }
         }
 
-        if (r >= 0)
-                m->watchdog_overridden[t] = timeout;
+        m->watchdog_overridden[t] = timeout;
 
         return 0;
 }
 
+void manager_retry_runtime_watchdog(Manager *m) {
+        int r = 0;
+
+        assert(m);
+
+        if (timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME]))
+                r = watchdog_set_timeout(&m->watchdog_overridden[WATCHDOG_RUNTIME]);
+        else
+                r = watchdog_set_timeout(&m->watchdog[WATCHDOG_RUNTIME]);
+
+        if (r >= 0)
+                m->runtime_watchdog_running = true;
+}
+
 static void manager_deserialize_uid_refs_one_internal(
                 Manager *m,
                 Hashmap** uid_refs,
index d22c801da8d818502921e502e3d112515540b4c8..19df889dd89a379df271fb95a4fb4560e7a3a01e 100644 (file)
@@ -241,6 +241,8 @@ struct Manager {
         usec_t watchdog[_WATCHDOG_TYPE_MAX];
         usec_t watchdog_overridden[_WATCHDOG_TYPE_MAX];
 
+        bool runtime_watchdog_running; /* Whether the runtime HW watchdog was started, so we know if we still need to get the real timeout from the hardware */
+
         dual_timestamp timestamps[_MANAGER_TIMESTAMP_MAX];
 
         /* Data specific to the device subsystem */
@@ -562,6 +564,7 @@ ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s);
 usec_t manager_get_watchdog(Manager *m, WatchdogType t);
 void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout);
 int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout);
+void manager_retry_runtime_watchdog(Manager *m);
 
 const char* oom_policy_to_string(OOMPolicy i) _const_;
 OOMPolicy oom_policy_from_string(const char *s) _pure_;