readonly t ExecMainStartTimestampMonotonic = ...;
readonly t ExecMainExitTimestamp = ...;
readonly t ExecMainExitTimestampMonotonic = ...;
+ readonly t ExecMainHandoverTimestamp = ...;
+ readonly t ExecMainHandoverTimestampMonotonic = ...;
readonly u ExecMainPID = ...;
readonly i ExecMainCode = ...;
readonly i ExecMainStatus = ...;
<variablelist class="dbus-property" generated="True" extra-ref="ExecMainExitTimestampMonotonic"/>
+ <variablelist class="dbus-property" generated="True" extra-ref="ExecMainHandoverTimestamp"/>
+
+ <variablelist class="dbus-property" generated="True" extra-ref="ExecMainHandoverTimestampMonotonic"/>
+
<variablelist class="dbus-property" generated="True" extra-ref="ExecMainPID"/>
<variablelist class="dbus-property" generated="True" extra-ref="ExecMainCode"/>
<para><varname>ExecMainStartTimestamp</varname>, <varname>ExecMainStartTimestampMonotonic</varname>,
<varname>ExecMainExitTimestamp</varname>, <varname>ExecMainExitTimestampMonotonic</varname>,
+ <varname>ExecMainHandoverTimestamp</varname>, <varname>ExecMainHandoverTimestampMonotonic</varname>,
<varname>ExecMainPID</varname>, <varname>ExecMainCode</varname>, <varname>ExecMainStatus</varname>
- contain information about the main process of the service as far as it is known. This is often the same
- runtime information that is stored in <varname>ExecStart</varname>. However, it deviates for
- <varname>Type=forking</varname> services where the main process of the service is not forked off
- systemd directly. These fields either contain information of the last run of the process or of the
- current running process.</para>
+ contain information about the main process of the service as far as it is known. The
+ <varname>ExecMainStartTimestamp</varname> timestamps record when the main child process is spawned by
+ the service manager. <varname>ExecMainExitTimestamp</varname> timestamps record when the main child
+ process exit has been detected by the service manager. <varname>ExecMainHandoverTimestamp</varname>
+ timestamps record when the service executable is executed by <command>systemd-executor</command>.
+ This is often the same runtime information that is stored in <varname>ExecStart=</varname>. However,
+ it deviates for <varname>Type=forking</varname> services where the main process of the service is not
+ forked off systemd directly. These fields either contain information of the last run of the process or
+ of the current running process.</para>
<para><varname>MainPID</varname> and <varname>ControlPID</varname> contain the main and control PID of
the service. The main PID is the current main PID of the service and is 0 when the service currently
<varname>MemoryZSwapCurrent</varname> were added in version 255.</para>
<para><varname>EffectiveMemoryHigh</varname>,
<varname>EffectiveMemoryMax</varname>,
- <varname>EffectiveTasksMax</varname>, and
- <varname>MemoryZSwapWriteback</varname> were added in version 256.</para>
+ <varname>EffectiveTasksMax</varname>,
+ <varname>MemoryZSwapWriteback</varname>,
+ <varname>ExecMainHandoverTimestampMonotonic</varname>, and
+ <varname>ExecMainHandoverTimestamp</varname> were added in version 256.</para>
</refsect2>
<refsect2>
<title>Socket Unit Objects</title>
#define BUS_EXEC_STATUS_VTABLE(prefix, offset, flags) \
BUS_PROPERTY_DUAL_TIMESTAMP(prefix "StartTimestamp", (offset) + offsetof(ExecStatus, start_timestamp), flags), \
BUS_PROPERTY_DUAL_TIMESTAMP(prefix "ExitTimestamp", (offset) + offsetof(ExecStatus, exit_timestamp), flags), \
+ BUS_PROPERTY_DUAL_TIMESTAMP(prefix "HandoverTimestamp", (offset) + offsetof(ExecStatus, handover_timestamp), flags), \
SD_BUS_PROPERTY(prefix "PID", "u", bus_property_get_pid, (offset) + offsetof(ExecStatus, pid), flags), \
SD_BUS_PROPERTY(prefix "Code", "i", bus_property_get_int, (offset) + offsetof(ExecStatus, code), flags), \
SD_BUS_PROPERTY(prefix "Status", "i", bus_property_get_int, (offset) + offsetof(ExecStatus, status), flags)
log_command_line(context, params, "Executing", executable, final_argv);
if (params->exec_fd >= 0) {
- uint8_t hot = 1;
+ usec_t t = now(CLOCK_MONOTONIC);
/* We have finished with all our initializations. Let's now let the manager know that. From this point
- * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
+ * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. We send a
+ * timestamp so that the service manager and users can track the precise moment we handed
+ * over execution of the service to the kernel. */
- if (write(params->exec_fd, &hot, sizeof(hot)) < 0) {
+ if (write(params->exec_fd, &t, sizeof(t)) < 0) {
*exit_status = EXIT_EXEC;
return log_exec_error_errno(context, params, errno, "Failed to enable exec_fd: %m");
}
r = fexecve_or_execve(executable_fd, executable, final_argv, accum_env);
if (params->exec_fd >= 0) {
- uint8_t hot = 0;
+ uint64_t hot = 0;
/* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
* that POLLHUP on it no longer means execve() succeeded. */
"%sStart Timestamp: %s\n",
prefix, FORMAT_TIMESTAMP(s->start_timestamp.realtime));
+ if (dual_timestamp_is_set(&s->handover_timestamp))
+ fprintf(f,
+ "%sHandover Timestamp: %s\n",
+ prefix, FORMAT_TIMESTAMP(s->handover_timestamp.realtime));
+
if (dual_timestamp_is_set(&s->exit_timestamp))
fprintf(f,
"%sExit Timestamp: %s\n"
struct ExecStatus {
dual_timestamp start_timestamp;
dual_timestamp exit_timestamp;
+ dual_timestamp handover_timestamp;
pid_t pid;
int code; /* as in siginfo_t::si_code */
int status; /* as in siginfo_t::si_status */
int stdout_fd;
int stderr_fd;
- /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done */
+ /* An fd that is closed by the execve(), and thus will result in EOF when the execve() is done. It
+ * will also be used to send a timestamp taken as the very last operation before execve, for
+ * tracking purposes. */
int exec_fd;
char *notify_socket;
#include "fd-util.h"
#include "fileio.h"
#include "format-util.h"
+#include "io-util.h"
#include "load-dropin.h"
#include "load-fragment.h"
#include "log.h"
log_unit_debug(UNIT(s), "Passing %zu fds to service", exec_params.n_socket_fds + exec_params.n_storage_fds);
}
- if (!FLAGS_SET(exec_params.flags, EXEC_IS_CONTROL) && s->type == SERVICE_EXEC) {
+ if (!FLAGS_SET(exec_params.flags, EXEC_IS_CONTROL) &&
+ IN_SET(s->type, SERVICE_NOTIFY, SERVICE_NOTIFY_RELOAD, SERVICE_EXEC, SERVICE_DBUS)) {
r = service_allocate_exec_fd(s, &exec_fd_source, &exec_params.exec_fd);
if (r < 0)
return r;
(void) serialize_item_format(f, "main-exec-status-pid", PID_FMT, s->main_exec_status.pid);
(void) serialize_dual_timestamp(f, "main-exec-status-start", &s->main_exec_status.start_timestamp);
(void) serialize_dual_timestamp(f, "main-exec-status-exit", &s->main_exec_status.exit_timestamp);
+ (void) serialize_dual_timestamp(f, "main-exec-status-handover", &s->main_exec_status.handover_timestamp);
if (dual_timestamp_is_set(&s->main_exec_status.exit_timestamp)) {
(void) serialize_item_format(f, "main-exec-status-code", "%i", s->main_exec_status.code);
deserialize_dual_timestamp(value, &s->main_exec_status.start_timestamp);
else if (streq(key, "main-exec-status-exit"))
deserialize_dual_timestamp(value, &s->main_exec_status.exit_timestamp);
+ else if (streq(key, "main-exec-status-handover"))
+ deserialize_dual_timestamp(value, &s->main_exec_status.handover_timestamp);
else if (streq(key, "notify-access-override")) {
NotifyAccess notify_access;
* the pipe to be closed (for example, a simple exit()). To deal with that we'll ignore EOFs on the pipe unless
* the child signalled us first that it is about to call the execve(). It does so by sending us a simple
* non-zero byte via the pipe. We also provide the child with a way to inform us in case execve() failed: if it
- * sends a zero byte we'll ignore POLLHUP on the fd again. */
+ * sends a zero byte we'll ignore POLLHUP on the fd again.
+ * For exec, dbus, notify and notify-reload types we also get a timestamp from sd-executor, taken immediately
+ * before the target executable is execve'd, so that we can accurately track when control is handed over to the
+ * payload. */
for (;;) {
- uint8_t x;
+ uint64_t x = 0;
ssize_t n;
- n = read(fd, &x, sizeof(x));
- if (n < 0) {
- if (errno == EAGAIN) /* O_NONBLOCK in effect → everything queued has now been processed. */
- return 0;
-
- return log_unit_error_errno(UNIT(s), errno, "Failed to read from exec_fd: %m");
- }
- if (n == 0) { /* EOF → the event we are waiting for */
-
+ n = loop_read(fd, &x, sizeof(x), /* do_poll= */ false);
+ if (n == -EAGAIN) /* O_NONBLOCK in effect → everything queued has now been processed. */
+ return 0;
+ if (n < 0)
+ return log_unit_error_errno(UNIT(s), n, "Failed to read from exec_fd: %m");
+ if (n == 0) {
+ /* EOF → the event we are waiting for in case of Type=exec */
s->exec_fd_event_source = sd_event_source_disable_unref(s->exec_fd_event_source);
if (s->exec_fd_hot) { /* Did the child tell us to expect EOF now? */
return 0;
}
+ if (n == 1) {
+ /* Backward compatibility block: a single byte was read, which means somehow an old
+ * executor before this logic was introduced sent the notification, so there is no
+ * timestamp (reset it). */
- /* A byte was read → this turns on/off the exec fd logic */
- assert(n == sizeof(x));
- s->exec_fd_hot = x;
- }
+ s->exec_fd_hot = x;
+ if (s->state == SERVICE_START)
+ s->main_exec_status.handover_timestamp = DUAL_TIMESTAMP_NULL;
- return 0;
+ continue;
+ }
+
+ if (x == 0) {
+ /* If we get x=0 then the execve actually failed, which means the exec fd logic is
+ * now off. Also reset the exec timestamp, given it has no meaning anymore. */
+
+ s->exec_fd_hot = false;
+ if (s->state == SERVICE_START)
+ s->main_exec_status.handover_timestamp = DUAL_TIMESTAMP_NULL;
+ } else {
+ /* A non-zero value was read, which means the exec fd logic is now enabled. Record
+ * the received timestamp so that users can track when control is handed over to the
+ * service payload. */
+
+ s->exec_fd_hot = true;
+ if (s->state == SERVICE_START) {
+ assert_cc(sizeof(uint64_t) == sizeof(usec_t));
+ dual_timestamp_from_monotonic(&s->main_exec_status.handover_timestamp, (usec_t)x);
+ }
+ }
+ }
}
static void service_notify_cgroup_empty_event(Unit *u) {
--- /dev/null
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+set -eux
+set -o pipefail
+
+# Check that timestamps of a Type=notify service are consistent
+
+systemd-run --service-type notify --property NotifyAccess=all --unit notify.service --wait sh -c 'systemd-notify --ready; exit 1' || :
+
+start=$(systemctl show --property=ExecMainStartTimestampMonotonic --value notify.service)
+handover=$(systemctl show --property=ExecMainHandoverTimestampMonotonic --value notify.service)
+active=$(systemctl show --property=ActiveEnterTimestampMonotonic --value notify.service)
+exit=$(systemctl show --property=ExecMainExitTimestampMonotonic --value notify.service)
+
+[[ $start -le $handover ]]
+[[ $handover -le $active ]]
+[[ $active -le $exit ]]