const int *fds, size_t n_fds) {
size_t n_dont_close = 0;
- int dont_close[n_fds + 15];
+ int dont_close[n_fds + 16];
assert(params);
if (params->user_lookup_fd >= 0)
dont_close[n_dont_close++] = params->user_lookup_fd;
+ if (params->handoff_timestamp_fd >= 0)
+ dont_close[n_dont_close++] = params->handoff_timestamp_fd;
+
assert(n_dont_close <= ELEMENTSOF(dont_close));
return close_all_fds(dont_close, n_dont_close);
p->stderr_fd = safe_close(p->stderr_fd);
}
+static int exec_fd_mark_hot(
+ const ExecContext *c,
+ ExecParameters *p,
+ bool hot,
+ int *reterr_exit_status) {
+
+ assert(c);
+ assert(p);
+
+ if (p->exec_fd < 0)
+ return 0;
+
+ uint8_t x = hot;
+
+ if (write(p->exec_fd, &x, sizeof(x)) < 0) {
+ if (reterr_exit_status)
+ *reterr_exit_status = EXIT_EXEC;
+ return log_exec_error_errno(c, p, errno, "Failed to mark exec_fd as %s: %m", hot ? "hot" : "cold");
+ }
+
+ return 1;
+}
+
+static int send_handoff_timestamp(
+ const ExecContext *c,
+ ExecParameters *p,
+ int *reterr_exit_status) {
+
+ assert(c);
+ assert(p);
+
+ if (p->handoff_timestamp_fd < 0)
+ return 0;
+
+ dual_timestamp dt;
+ dual_timestamp_now(&dt);
+
+ if (send(p->handoff_timestamp_fd, (const usec_t[2]) { dt.realtime, dt.monotonic }, sizeof(usec_t) * 2, 0) < 0) {
+ if (reterr_exit_status)
+ *reterr_exit_status = EXIT_EXEC;
+ return log_exec_error_errno(c, p, errno, "Failed to send handoff timestamp: %m");
+ }
+
+ return 1;
+}
+
int exec_invoke(
const ExecCommand *command,
const ExecContext *context,
return log_exec_error_errno(context, params, r, "Failed to get OpenFile= file descriptors: %m");
}
- int keep_fds[n_fds + 3];
+ int keep_fds[n_fds + 4];
memcpy_safe(keep_fds, params->fds, n_fds * sizeof(int));
n_keep_fds = n_fds;
return log_exec_error_errno(context, params, r, "Failed to collect shifted fd: %m");
}
+ r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, ¶ms->handoff_timestamp_fd);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_exec_error_errno(context, params, r, "Failed to collect shifted fd: %m");
+ }
+
#if HAVE_LIBBPF
r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, ¶ms->bpf_restrict_fs_map_fd);
if (r < 0) {
/* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that
* we are more aggressive this time, since we don't need socket_fd and the netns and ipcns fds any
- * more. We do keep exec_fd however, if we have it, since we need to keep it open until the final
- * execve(). But first, close the remaining sockets in the context objects. */
+ * more. We do keep exec_fd and handoff_timestamp_fd however, if we have it, since we need to keep
+ * them open until the final execve(). But first, close the remaining sockets in the context
+ * objects. */
exec_runtime_close(runtime);
exec_params_close(params);
log_command_line(context, params, "Executing", executable, final_argv);
- if (params->exec_fd >= 0) {
- usec_t t = now(CLOCK_MONOTONIC);
+ /* We have finished with all our initializations. Let's now let the manager know that. From this
+ * point on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
- /* We have finished with all our initializations. Let's now let the manager know that. From this point
- * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. We send a
- * timestamp so that the service manager and users can track the precise moment we handed
- * over execution of the service to the kernel. */
+ r = exec_fd_mark_hot(context, params, /* hot= */ true, exit_status);
+ if (r < 0)
+ return r;
- if (write(params->exec_fd, &t, sizeof(t)) < 0) {
- *exit_status = EXIT_EXEC;
- return log_exec_error_errno(context, params, errno, "Failed to enable exec_fd: %m");
- }
+ /* As last thing before the execve(), let's send the handoff timestamp */
+ r = send_handoff_timestamp(context, params, exit_status);
+ if (r < 0) {
+ /* If this handoff timestamp failed, let's undo the marking as hot */
+ (void) exec_fd_mark_hot(context, params, /* hot= */ false, /* reterr_exit_status= */ NULL);
+ return r;
}
- r = fexecve_or_execve(executable_fd, executable, final_argv, accum_env);
-
- if (params->exec_fd >= 0) {
- uint64_t hot = 0;
+ /* NB: we leave executable_fd, exec_fd, handoff_timestamp_fd open here. This is safe, because they
+ * have O_CLOEXEC set, and the execve() below will thus automatically close them. In fact, for
+ * exec_fd this is pretty much the whole raison d'etre. */
- /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
- * that POLLHUP on it no longer means execve() succeeded. */
+ r = fexecve_or_execve(executable_fd, executable, final_argv, accum_env);
- if (write(params->exec_fd, &hot, sizeof(hot)) < 0) {
- *exit_status = EXIT_EXEC;
- return log_exec_error_errno(context, params, errno, "Failed to disable exec_fd: %m");
- }
- }
+ /* The execve() failed, let's undo the marking as hot */
+ (void) exec_fd_mark_hot(context, params, /* hot= */ false, /* reterr_exit_status= */ NULL);
*exit_status = EXIT_EXEC;
return log_exec_error_errno(context, params, r, "Failed to execute %s: %m", executable);
log_unit_debug(UNIT(s), "Passing %zu fds to service", exec_params.n_socket_fds + exec_params.n_storage_fds);
}
- if (!FLAGS_SET(exec_params.flags, EXEC_IS_CONTROL) &&
- IN_SET(s->type, SERVICE_NOTIFY, SERVICE_NOTIFY_RELOAD, SERVICE_EXEC, SERVICE_DBUS)) {
+ if (!FLAGS_SET(exec_params.flags, EXEC_IS_CONTROL) && s->type == SERVICE_EXEC) {
r = service_allocate_exec_fd(s, &exec_fd_source, &exec_params.exec_fd);
if (r < 0)
return r;
log_unit_debug(UNIT(s), "got exec-fd event");
/* If Type=exec is set, we'll consider a service started successfully the instant we invoked execve()
- * successfully for it. We implement this through a pipe() towards the child, which the kernel automatically
- * closes for us due to O_CLOEXEC on execve() in the child, which then triggers EOF on the pipe in the
- * parent. We need to be careful however, as there are other reasons that we might cause the child's side of
- * the pipe to be closed (for example, a simple exit()). To deal with that we'll ignore EOFs on the pipe unless
- * the child signalled us first that it is about to call the execve(). It does so by sending us a simple
- * non-zero byte via the pipe. We also provide the child with a way to inform us in case execve() failed: if it
- * sends a zero byte we'll ignore POLLHUP on the fd again.
- * For exec, dbus, notify and notify-reload types we also get a timestamp from sd-executor, taken immediately
- * before the target executable is execve'd, so that we can accurately track when control is handed over to the
- * payload. */
+ * successfully for it. We implement this through a pipe() towards the child, which the kernel
+ * automatically closes for us due to O_CLOEXEC on execve() in the child, which then triggers EOF on
+ * the pipe in the parent. We need to be careful however, as there are other reasons that we might
+ * cause the child's side of the pipe to be closed (for example, a simple exit()). To deal with that
+ * we'll ignore EOFs on the pipe unless the child signalled us first that it is about to call the
+ * execve(). It does so by sending us a simple non-zero byte via the pipe. We also provide the child
+ * with a way to inform us in case execve() failed: if it sends a zero byte we'll ignore POLLHUP on
+ * the fd again. */
for (;;) {
- uint64_t x = 0;
+ uint8_t x;
ssize_t n;
- n = loop_read(fd, &x, sizeof(x), /* do_poll= */ false);
- if (n == -EAGAIN) /* O_NONBLOCK in effect → everything queued has now been processed. */
- return 0;
- if (n < 0)
- return log_unit_error_errno(UNIT(s), n, "Failed to read from exec_fd: %m");
- if (n == 0) {
- /* EOF → the event we are waiting for in case of Type=exec */
+ n = read(fd, &x, sizeof(x));
+ if (n < 0) {
+ if (errno == EAGAIN) /* O_NONBLOCK in effect → everything queued has now been processed. */
+ return 0;
+
+ return log_unit_error_errno(UNIT(s), errno, "Failed to read from exec_fd: %m");
+ }
+ if (n == 0) { /* EOF → the event we are waiting for in case of Type=exec */
s->exec_fd_event_source = sd_event_source_disable_unref(s->exec_fd_event_source);
if (s->exec_fd_hot) { /* Did the child tell us to expect EOF now? */
return 0;
}
- if (n == 1) {
- /* Backward compatibility block: a single byte was read, which means somehow an old
- * executor before this logic was introduced sent the notification, so there is no
- * timestamp (reset it). */
-
- s->exec_fd_hot = x;
- if (s->state == SERVICE_START)
- s->main_exec_status.handover_timestamp = DUAL_TIMESTAMP_NULL;
-
- continue;
- }
- if (x == 0) {
- /* If we get x=0 then the execve actually failed, which means the exec fd logic is
- * now off. Also reset the exec timestamp, given it has no meaning anymore. */
+ /* A byte was read → this turns on/off the exec fd logic */
+ assert(n == sizeof(x));
- s->exec_fd_hot = false;
- if (s->state == SERVICE_START)
- s->main_exec_status.handover_timestamp = DUAL_TIMESTAMP_NULL;
- } else {
- /* A non-zero value was read, which means the exec fd logic is now enabled. Record
- * the received timestamp so that users can track when control is handed over to the
- * service payload. */
-
- s->exec_fd_hot = true;
- if (s->state == SERVICE_START) {
- assert_cc(sizeof(uint64_t) == sizeof(usec_t));
- dual_timestamp_from_monotonic(&s->main_exec_status.handover_timestamp, (usec_t)x);
- }
- }
+ s->exec_fd_hot = x;
}
}