core: store timestamps of unit load attempts

author Luca Boccassi <luca.boccassi@microsoft.com>

Tue, 16 Jun 2020 17:46:55 +0000 (18:46 +0100)

committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>

Tue, 30 Jun 2020 14:50:00 +0000 (16:50 +0200)
author Luca Boccassi <luca.boccassi@microsoft.com>
Tue, 16 Jun 2020 17:46:55 +0000 (18:46 +0100)
committer Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 30 Jun 2020 14:50:00 +0000 (16:50 +0200)
diff --git a/src/core/manager.c b/src/core/manager.c

index ab3d0b1192c1b39ca33f9b021e7bf3710bb50836..7b199f5175df7de8c7aa9c039d5e727eab70bdae 100644 (file)
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -1932,10 +1932,11 @@ unsigned manager_dispatch_load_queue(Manager *m) {
          return n;
  }
  
-static bool manager_unit_cache_needs_refresh(Manager *m) {
+static bool manager_unit_cache_needs_refresh(Manager *m, Unit *u) {
          assert(m);
  
-        return m->unit_cache_mtime > 0 && !lookup_paths_mtime_good(&m->lookup_paths, m->unit_cache_mtime);
+        return m->unit_cache_mtime > 0 &&
+                (m->unit_cache_mtime > u->fragment_loadtime || !lookup_paths_mtime_good(&m->lookup_paths, m->unit_cache_mtime));
  }
  
  int manager_load_unit_prepare(
@@ -1982,8 +1983,12 @@ int manager_load_unit_prepare(
                   * but if they are already referenced (because of dependencies or ordering)
                   * then we have to force a load of the fragment. As an optimization, check
                   * first if anything in the usual paths was modified since the last time
-                 * the cache was loaded. */
-                if (ret->load_state == UNIT_NOT_FOUND && manager_unit_cache_needs_refresh(m))
+                 * the cache was loaded. Also check if the last time an attempt to load the
+                 * unit was made was before the most recent cache refresh, so that we know
+                 * we need to try again - even if the cache is current, it might have been
+                 * updated in a different context before we had a chance to retry loading
+                 * this particular unit. */
+                if (ret->load_state == UNIT_NOT_FOUND && manager_unit_cache_needs_refresh(m, ret))
                          ret->load_state = UNIT_STUB;
                  else {
                          *_ret = ret;
diff --git a/src/core/unit.c b/src/core/unit.c

index 2e4482da193b5c130a38af07353e706d7356e57e..18bf0cd52ab0df9bc6974fbb30b61f90d7cec69d 100644 (file)
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -1682,6 +1682,11 @@ fail:
                                                       UNIT_ERROR;
          u->load_error = r;
  
+        /* Record the last time we tried to load the unit, so that if the cache gets updated between now
+         * and the next time an attempt is made to load this unit, we know we need to check again */
+        if (u->load_state == UNIT_NOT_FOUND)
+                u->fragment_loadtime = now(CLOCK_REALTIME);
+
          unit_add_to_dbus_queue(u);
          unit_add_to_gc_queue(u);
  
diff --git a/src/core/unit.h b/src/core/unit.h

index 6a90daa7cedb45263a19ce3fa80a0a174f425000..d5e4c65989e400922724efb333714b91bfa06e67 100644 (file)
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -136,6 +136,7 @@ typedef struct Unit {
          char *source_path; /* if converted, the source file */
          char **dropin_paths;
  
+        usec_t fragment_loadtime;
          usec_t fragment_mtime;
          usec_t source_mtime;
          usec_t dropin_mtime;
diff --git a/test/TEST-48-START-STOP-NO-RELOAD/blacklist-ubuntu-ci b/test/TEST-48-START-STOP-NO-RELOAD/blacklist-ubuntu-ci

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/test/units/testsuite-48.sh b/test/units/testsuite-48.sh

index a811134d7746ad145b40ad2dd4cc493ed99bebd4..93e5e98e423ae0c24a1cafac05af673752387bf6 100755 (executable)
--- a/test/units/testsuite-48.sh
+++ b/test/units/testsuite-48.sh
@@ -21,18 +21,38 @@ systemctl start testservice-48.target
  # May 07 23:12:20 systemd-testsuite testsuite-48.sh[52]: -rw-r--r-- 1 root root 50 2020-05-07 23:12:20.000000000 +0100 /
  # May 07 23:12:20 systemd-testsuite testsuite-48.sh[30]: + stat -f --format=%t /etc/systemd/system/testservice-48.servic
  # May 07 23:12:20 systemd-testsuite testsuite-48.sh[53]: ef53
-sleep 1.1
+sleep 3.1
  
  cat > /run/systemd/system/testservice-48.service <<EOF
  [Service]
  ExecStart=/bin/sleep infinity
-Type=exec
  EOF
  
  systemctl start testservice-48.service
  
  systemctl is-active testservice-48.service
  
+# Stop and remove, and try again to exercise https://github.com/systemd/systemd/issues/15992
+systemctl stop testservice-48.service
+rm -f /run/systemd/system/testservice-48.service
+systemctl daemon-reload
+
+sleep 3.1
+
+cat > /run/systemd/system/testservice-48.service <<EOF
+[Service]
+ExecStart=/bin/sleep infinity
+EOF
+
+# Start a non-existing unit first, so that the cache is reloaded for an unrelated
+# reason. Starting the existing unit later should still work thanks to the check
+# for the last load attempt vs cache timestamp.
+systemctl start testservice-48-nonexistent.service || true
+
+systemctl start testservice-48.service
+
+systemctl is-active testservice-48.service
+
  echo OK > /testok
  
  exit 0
author	Luca Boccassi <luca.boccassi@microsoft.com>
	Tue, 16 Jun 2020 17:46:55 +0000 (18:46 +0100)
committer	Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
	Tue, 30 Jun 2020 14:50:00 +0000 (16:50 +0200)
src/core/manager.c		patch \| blob \| blame \| history
src/core/unit.c		patch \| blob \| blame \| history
src/core/unit.h		patch \| blob \| blame \| history
test/TEST-48-START-STOP-NO-RELOAD/blacklist-ubuntu-ci	[deleted file]	patch \| blob \| blame \| history
test/units/testsuite-48.sh		patch \| blob \| blame \| history