From: Lennart Poettering Date: Tue, 23 Apr 2024 21:17:49 +0000 (+0200) Subject: manager: add socket for receiving handoff timestamps from forked children X-Git-Tag: v256-rc1~14^2~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=817062e6211e7559ff91bcb7bb5dc4e472fc1646;p=thirdparty%2Fsystemd.git manager: add socket for receiving handoff timestamps from forked children This adds an AF_UNIX socket pair to the manager that we can collect handoff timestamp messages on. The idea is that forked off children send a datagram with a timestamp and we use its sender PID to match it against the right forked off process. This part only implements the receiving side: a socket is created, and listened on. Received datagrams are parsed, verified and then dispatched to the interested units. --- diff --git a/src/core/manager-serialize.c b/src/core/manager-serialize.c index 0b3def3dabe..39a8a0533f0 100644 --- a/src/core/manager-serialize.c +++ b/src/core/manager-serialize.c @@ -148,6 +148,12 @@ int manager_serialize( return r; } + if (m->handoff_timestamp_fds[0] >= 0) { + r = serialize_fd_many(f, fds, "handoff-timestamp-fds", m->handoff_timestamp_fds, 2); + if (r < 0) + return r; + } + (void) serialize_ratelimit(f, "dump-ratelimit", &m->dump_ratelimit); (void) serialize_ratelimit(f, "reload-reexec-ratelimit", &m->reload_reexec_ratelimit); diff --git a/src/core/manager.c b/src/core/manager.c index 89c80821b94..04103e0fe93 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -56,6 +56,7 @@ #include "inotify-util.h" #include "install.h" #include "io-util.h" +#include "iovec-util.h" #include "label-util.h" #include "load-fragment.h" #include "locale-setup.h" @@ -124,6 +125,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); +static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata); static int manager_dispatch_run_queue(sd_event_source *source, void *userdata); static int manager_dispatch_sigchld(sd_event_source *source, void *userdata); @@ -904,6 +906,7 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags, .cgroups_agent_fd = -EBADF, .signal_fd = -EBADF, .user_lookup_fds = EBADF_PAIR, + .handoff_timestamp_fds = EBADF_PAIR, .private_listen_fd = -EBADF, .dev_autofs_fd = -EBADF, .cgroup_inotify_fd = -EBADF, @@ -1253,6 +1256,49 @@ static int manager_setup_user_lookup_fd(Manager *m) { return 0; } +static int manager_setup_handoff_timestamp_fd(Manager *m) { + int r; + + assert(m); + + /* Set up the socket pair used for for passing timestamps back when the executor processes we fork + * off invokes execve(), i.e. when we hand off control to our payload processes. */ + + if (m->handoff_timestamp_fds[0] < 0) { + m->handoff_timestamp_event_source = sd_event_source_disable_unref(m->handoff_timestamp_event_source); + safe_close_pair(m->handoff_timestamp_fds); + + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->handoff_timestamp_fds) < 0) + return log_error_errno(errno, "Failed to allocate handoff timestamp socket: %m"); + + /* Make sure children never have to block */ + (void) fd_increase_rxbuf(m->handoff_timestamp_fds[0], NOTIFY_RCVBUF_SIZE); + + r = setsockopt_int(m->handoff_timestamp_fds[0], SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return log_error_errno(r, "SO_PASSCRED failed: %m"); + + /* Mark the receiving socket as O_NONBLOCK (but leave sending side as-is) */ + r = fd_nonblock(m->handoff_timestamp_fds[0], true); + if (r < 0) + return log_error_errno(r, "Failed to make handoff timestamp socket O_NONBLOCK: %m"); + } + + if (!m->handoff_timestamp_event_source) { + r = sd_event_add_io(m->event, &m->handoff_timestamp_event_source, m->handoff_timestamp_fds[0], EPOLLIN, manager_dispatch_handoff_timestamp_fd, m); + if (r < 0) + return log_error_errno(errno, "Failed to allocate handoff timestamp event source: %m"); + + r = sd_event_source_set_priority(m->handoff_timestamp_event_source, EVENT_PRIORITY_HANDOFF_TIMESTAMP); + if (r < 0) + return log_error_errno(errno, "Failed to set priority of handoff timestamp event source: %m"); + + (void) sd_event_source_set_description(m->handoff_timestamp_event_source, "handoff-timestamp"); + } + + return 0; +} + static unsigned manager_dispatch_cleanup_queue(Manager *m) { Unit *u; unsigned n = 0; @@ -1667,12 +1713,14 @@ Manager* manager_free(Manager *m) { sd_event_source_unref(m->jobs_in_progress_event_source); sd_event_source_unref(m->run_queue_event_source); sd_event_source_unref(m->user_lookup_event_source); + sd_event_source_unref(m->handoff_timestamp_event_source); sd_event_source_unref(m->memory_pressure_event_source); safe_close(m->signal_fd); safe_close(m->notify_fd); safe_close(m->cgroups_agent_fd); safe_close_pair(m->user_lookup_fds); + safe_close_pair(m->handoff_timestamp_fds); manager_close_ask_password(m); @@ -2013,6 +2061,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds, const char *roo /* This shouldn't fail, except if things are really broken. */ return r; + r = manager_setup_handoff_timestamp_fd(m); + if (r < 0) + /* This shouldn't fail, except if things are really broken. */ + return r; + /* Connect to the bus if we are good for it */ manager_setup_bus(m); @@ -3655,6 +3708,7 @@ int manager_reload(Manager *m) { (void) manager_setup_notify(m); (void) manager_setup_cgroups_agent(m); (void) manager_setup_user_lookup_fd(m); + (void) manager_setup_handoff_timestamp_fd(m); /* Third, fire things up! */ manager_coldplug(m); @@ -4830,6 +4884,73 @@ static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint return 0; } +static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { + Manager *m = ASSERT_PTR(userdata); + usec_t ts[2] = {}; + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control; + struct msghdr msghdr = { + .msg_iov = &IOVEC_MAKE(ts, sizeof(ts)), + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + ssize_t n; + + assert(source); + + n = recvmsg_safe(m->handoff_timestamp_fds[0], &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC); + if (ERRNO_IS_NEG_TRANSIENT(n)) + return 0; /* Spurious wakeup, try again */ + if (n == -EXFULL) { + log_warning("Got message with truncated control, ignoring."); + return 0; + } + if (n < 0) + return log_error_errno(n, "Failed to receive handoff timestamp message: %m"); + + if (msghdr.msg_flags & MSG_TRUNC) { + log_warning("Got truncated handoff timestamp message, ignoring."); + return 0; + } + if (n != sizeof(ts)) { + log_warning("Got handoff timestamp message of unexpected size %zi (expected %zu), ignoring.", n, sizeof(ts)); + return 0; + } + + struct ucred *ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred); + if (!ucred || !pid_is_valid(ucred->pid)) { + log_warning("Received notify message without valid credentials. Ignoring."); + return 0; + } + + log_debug("Got handoff timestamp event for PID " PID_FMT ".", ucred->pid); + + _cleanup_free_ Unit **units = NULL; + int n_units = manager_get_units_for_pidref(m, &PIDREF_MAKE_FROM_PID(ucred->pid), &units); + if (n_units < 0) { + log_warning_errno(n_units, "Unable to determine units for PID " PID_FMT ", ignoring: %m", ucred->pid); + return 0; + } + if (n_units == 0) { + log_debug("Got handoff timestamp for process " PID_FMT " we are not interested in, ignoring.", ucred->pid); + return 0; + } + + dual_timestamp dt = { + .realtime = ts[0], + .monotonic = ts[1], + }; + + FOREACH_ARRAY(u, units, n_units) { + if (!UNIT_VTABLE(*u)->notify_handoff_timestamp) + continue; + + UNIT_VTABLE(*u)->notify_handoff_timestamp(*u, ucred, &dt); + } + + return 0; +} + void manager_ref_console(Manager *m) { assert(m); diff --git a/src/core/manager.h b/src/core/manager.h index f0f814f940c..0641b2726f0 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -286,6 +286,9 @@ struct Manager { int user_lookup_fds[2]; sd_event_source *user_lookup_event_source; + int handoff_timestamp_fds[2]; + sd_event_source *handoff_timestamp_event_source; + RuntimeScope runtime_scope; LookupPaths lookup_paths; @@ -656,22 +659,23 @@ void unit_defaults_done(UnitDefaults *defaults); enum { /* most important … */ - EVENT_PRIORITY_USER_LOOKUP = SD_EVENT_PRIORITY_NORMAL-10, - EVENT_PRIORITY_MOUNT_TABLE = SD_EVENT_PRIORITY_NORMAL-9, - EVENT_PRIORITY_SWAP_TABLE = SD_EVENT_PRIORITY_NORMAL-9, - EVENT_PRIORITY_CGROUP_AGENT = SD_EVENT_PRIORITY_NORMAL-8, /* cgroupv1 */ - EVENT_PRIORITY_CGROUP_INOTIFY = SD_EVENT_PRIORITY_NORMAL-8, /* cgroupv2 */ - EVENT_PRIORITY_CGROUP_OOM = SD_EVENT_PRIORITY_NORMAL-7, - EVENT_PRIORITY_EXEC_FD = SD_EVENT_PRIORITY_NORMAL-6, - EVENT_PRIORITY_NOTIFY = SD_EVENT_PRIORITY_NORMAL-5, - EVENT_PRIORITY_SIGCHLD = SD_EVENT_PRIORITY_NORMAL-4, - EVENT_PRIORITY_SIGNALS = SD_EVENT_PRIORITY_NORMAL-3, - EVENT_PRIORITY_CGROUP_EMPTY = SD_EVENT_PRIORITY_NORMAL-2, - EVENT_PRIORITY_TIME_CHANGE = SD_EVENT_PRIORITY_NORMAL-1, - EVENT_PRIORITY_TIME_ZONE = SD_EVENT_PRIORITY_NORMAL-1, - EVENT_PRIORITY_IPC = SD_EVENT_PRIORITY_NORMAL, - EVENT_PRIORITY_REWATCH_PIDS = SD_EVENT_PRIORITY_IDLE, - EVENT_PRIORITY_SERVICE_WATCHDOG = SD_EVENT_PRIORITY_IDLE+1, - EVENT_PRIORITY_RUN_QUEUE = SD_EVENT_PRIORITY_IDLE+2, + EVENT_PRIORITY_USER_LOOKUP = SD_EVENT_PRIORITY_NORMAL-11, + EVENT_PRIORITY_MOUNT_TABLE = SD_EVENT_PRIORITY_NORMAL-10, + EVENT_PRIORITY_SWAP_TABLE = SD_EVENT_PRIORITY_NORMAL-10, + EVENT_PRIORITY_CGROUP_AGENT = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv1 */ + EVENT_PRIORITY_CGROUP_INOTIFY = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv2 */ + EVENT_PRIORITY_CGROUP_OOM = SD_EVENT_PRIORITY_NORMAL-8, + EVENT_PRIORITY_HANDOFF_TIMESTAMP = SD_EVENT_PRIORITY_NORMAL-7, + EVENT_PRIORITY_EXEC_FD = SD_EVENT_PRIORITY_NORMAL-6, + EVENT_PRIORITY_NOTIFY = SD_EVENT_PRIORITY_NORMAL-5, + EVENT_PRIORITY_SIGCHLD = SD_EVENT_PRIORITY_NORMAL-4, + EVENT_PRIORITY_SIGNALS = SD_EVENT_PRIORITY_NORMAL-3, + EVENT_PRIORITY_CGROUP_EMPTY = SD_EVENT_PRIORITY_NORMAL-2, + EVENT_PRIORITY_TIME_CHANGE = SD_EVENT_PRIORITY_NORMAL-1, + EVENT_PRIORITY_TIME_ZONE = SD_EVENT_PRIORITY_NORMAL-1, + EVENT_PRIORITY_IPC = SD_EVENT_PRIORITY_NORMAL, + EVENT_PRIORITY_REWATCH_PIDS = SD_EVENT_PRIORITY_IDLE, + EVENT_PRIORITY_SERVICE_WATCHDOG = SD_EVENT_PRIORITY_IDLE+1, + EVENT_PRIORITY_RUN_QUEUE = SD_EVENT_PRIORITY_IDLE+2, /* … to least important */ }; diff --git a/src/core/unit.h b/src/core/unit.h index 74520a33cb3..b135fecc514 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -632,6 +632,9 @@ typedef struct UnitVTable { /* Called whenever a process of this unit sends us a message */ void (*notify_message)(Unit *u, const struct ucred *ucred, char * const *tags, FDSet *fds); + /* Called whenever we learn a handoff timestamp */ + void (*notify_handoff_timestamp)(Unit *u, const struct ucred *ucred, const dual_timestamp *ts); + /* Called whenever a name this Unit registered for comes or goes away. */ void (*bus_name_owner_change)(Unit *u, const char *new_owner);