/* SPDX-License-Identifier: LGPL-2.1+ */
-/***
- This file is part of systemd.
-
- Copyright 2013 Lennart Poettering
-***/
#include <sys/epoll.h>
#include <sys/timerfd.h>
#include "sd-id128.h"
#include "alloc-util.h"
+#include "event-source.h"
#include "fd-util.h"
#include "fs-util.h"
#include "hashmap.h"
#include "list.h"
#include "macro.h"
+#include "memory-util.h"
#include "missing.h"
#include "prioq.h"
#include "process-util.h"
#include "string-table.h"
#include "string-util.h"
#include "time-util.h"
-#include "util.h"
#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
-typedef enum EventSourceType {
- SOURCE_IO,
- SOURCE_TIME_REALTIME,
- SOURCE_TIME_BOOTTIME,
- SOURCE_TIME_MONOTONIC,
- SOURCE_TIME_REALTIME_ALARM,
- SOURCE_TIME_BOOTTIME_ALARM,
- SOURCE_SIGNAL,
- SOURCE_CHILD,
- SOURCE_DEFER,
- SOURCE_POST,
- SOURCE_EXIT,
- SOURCE_WATCHDOG,
- SOURCE_INOTIFY,
- _SOURCE_EVENT_SOURCE_TYPE_MAX,
- _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
-} EventSourceType;
-
static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
[SOURCE_IO] = "io",
[SOURCE_TIME_REALTIME] = "realtime",
DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
-/* All objects we use in epoll events start with this value, so that
- * we know how to dispatch it */
-typedef enum WakeupType {
- WAKEUP_NONE,
- WAKEUP_EVENT_SOURCE,
- WAKEUP_CLOCK_DATA,
- WAKEUP_SIGNAL_DATA,
- WAKEUP_INOTIFY_DATA,
- _WAKEUP_TYPE_MAX,
- _WAKEUP_TYPE_INVALID = -1,
-} WakeupType;
-
#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
-struct inode_data;
-
-struct sd_event_source {
- WakeupType wakeup;
-
- unsigned n_ref;
-
- sd_event *event;
- void *userdata;
- sd_event_handler_t prepare;
-
- char *description;
-
- EventSourceType type:5;
- int enabled:3;
- bool pending:1;
- bool dispatching:1;
- bool floating:1;
-
- int64_t priority;
- unsigned pending_index;
- unsigned prepare_index;
- uint64_t pending_iteration;
- uint64_t prepare_iteration;
-
- LIST_FIELDS(sd_event_source, sources);
-
- union {
- struct {
- sd_event_io_handler_t callback;
- int fd;
- uint32_t events;
- uint32_t revents;
- bool registered:1;
- bool owned:1;
- } io;
- struct {
- sd_event_time_handler_t callback;
- usec_t next, accuracy;
- unsigned earliest_index;
- unsigned latest_index;
- } time;
- struct {
- sd_event_signal_handler_t callback;
- struct signalfd_siginfo siginfo;
- int sig;
- } signal;
- struct {
- sd_event_child_handler_t callback;
- siginfo_t siginfo;
- pid_t pid;
- int options;
- } child;
- struct {
- sd_event_handler_t callback;
- } defer;
- struct {
- sd_event_handler_t callback;
- } post;
- struct {
- sd_event_handler_t callback;
- unsigned prioq_index;
- } exit;
- struct {
- sd_event_inotify_handler_t callback;
- uint32_t mask;
- struct inode_data *inode_data;
- LIST_FIELDS(sd_event_source, by_inode_data);
- } inotify;
- };
-};
-
-struct clock_data {
- WakeupType wakeup;
- int fd;
-
- /* For all clocks we maintain two priority queues each, one
- * ordered for the earliest times the events may be
- * dispatched, and one ordered by the latest times they must
- * have been dispatched. The range between the top entries in
- * the two prioqs is the time window we can freely schedule
- * wakeups in */
-
- Prioq *earliest;
- Prioq *latest;
- usec_t next;
-
- bool needs_rearm:1;
-};
-
-struct signal_data {
- WakeupType wakeup;
-
- /* For each priority we maintain one signal fd, so that we
- * only have to dequeue a single event per priority at a
- * time. */
-
- int fd;
- int64_t priority;
- sigset_t sigset;
- sd_event_source *current;
-};
-
-/* A structure listing all event sources currently watching a specific inode */
-struct inode_data {
- /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
- ino_t ino;
- dev_t dev;
-
- /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
- * rearrange the priority still until then, as we need the original inode to change the priority as we need to
- * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
- * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
- * the sd-event object, so that it is efficient to close everything, before entering the next event loop
- * iteration. */
- int fd;
-
- /* The inotify "watch descriptor" */
- int wd;
-
- /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
- * most recently been set on the watch descriptor. */
- uint32_t combined_mask;
-
- /* All event sources subscribed to this inode */
- LIST_HEAD(sd_event_source, event_sources);
-
- /* The inotify object we watch this inode with */
- struct inotify_data *inotify_data;
-
- /* A linked list of all inode data objects with fds to close (see above) */
- LIST_FIELDS(struct inode_data, to_close);
-};
-
-/* A structure encapsulating an inotify fd */
-struct inotify_data {
- WakeupType wakeup;
-
- /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
- * a time */
-
- int fd;
- int64_t priority;
-
- Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
- Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
-
- /* The buffer we read inotify events into */
- union inotify_event_buffer buffer;
- size_t buffer_filled; /* fill level of the buffer */
-
- /* How many event sources are currently marked pending for this inotify. We won't read new events off the
- * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
- * the events locally if they can't be coalesced). */
- unsigned n_pending;
-
- /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
- * to make it efficient to figure out what inotify objects to process data on next. */
- LIST_FIELDS(struct inotify_data, buffered);
-};
-
struct sd_event {
unsigned n_ref;
static int pending_prioq_compare(const void *a, const void *b) {
const sd_event_source *x = a, *y = b;
+ int r;
assert(x->pending);
assert(y->pending);
return 1;
/* Lower priority values first */
- if (x->priority < y->priority)
- return -1;
- if (x->priority > y->priority)
- return 1;
+ r = CMP(x->priority, y->priority);
+ if (r != 0)
+ return r;
/* Older entries first */
- if (x->pending_iteration < y->pending_iteration)
- return -1;
- if (x->pending_iteration > y->pending_iteration)
- return 1;
-
- return 0;
+ return CMP(x->pending_iteration, y->pending_iteration);
}
static int prepare_prioq_compare(const void *a, const void *b) {
const sd_event_source *x = a, *y = b;
+ int r;
assert(x->prepare);
assert(y->prepare);
/* Move most recently prepared ones last, so that we can stop
* preparing as soon as we hit one that has already been
* prepared in the current iteration */
- if (x->prepare_iteration < y->prepare_iteration)
- return -1;
- if (x->prepare_iteration > y->prepare_iteration)
- return 1;
+ r = CMP(x->prepare_iteration, y->prepare_iteration);
+ if (r != 0)
+ return r;
/* Lower priority values first */
- if (x->priority < y->priority)
- return -1;
- if (x->priority > y->priority)
- return 1;
-
- return 0;
+ return CMP(x->priority, y->priority);
}
static int earliest_time_prioq_compare(const void *a, const void *b) {
return 1;
/* Order by time */
- if (x->time.next < y->time.next)
- return -1;
- if (x->time.next > y->time.next)
- return 1;
-
- return 0;
+ return CMP(x->time.next, y->time.next);
}
static usec_t time_event_source_latest(const sd_event_source *s) {
return 1;
/* Order by time */
- if (time_event_source_latest(x) < time_event_source_latest(y))
- return -1;
- if (time_event_source_latest(x) > time_event_source_latest(y))
- return 1;
-
- return 0;
+ return CMP(time_event_source_latest(x), time_event_source_latest(y));
}
static int exit_prioq_compare(const void *a, const void *b) {
return 1;
/* Lower priority values first */
- if (x->priority < y->priority)
- return -1;
- if (x->priority > y->priority)
- return 1;
-
- return 0;
+ return CMP(x->priority, y->priority);
}
static void free_clock_data(struct clock_data *d) {
prioq_free(d->latest);
}
-static void event_free(sd_event *e) {
+static sd_event *event_free(sd_event *e) {
sd_event_source *s;
assert(e);
hashmap_free(e->child_sources);
set_free(e->post_sources);
- free(e);
+
+ return mfree(e);
}
_public_ int sd_event_new(sd_event** ret) {
return r;
}
-_public_ sd_event* sd_event_ref(sd_event *e) {
-
- if (!e)
- return NULL;
-
- assert(e->n_ref >= 1);
- e->n_ref++;
-
- return e;
-}
-
-_public_ sd_event* sd_event_unref(sd_event *e) {
-
- if (!e)
- return NULL;
-
- assert(e->n_ref >= 1);
- e->n_ref--;
-
- if (e->n_ref <= 0)
- event_free(e);
-
- return NULL;
-}
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
static bool event_pid_changed(sd_event *e) {
assert(e);
}
}
+static void event_free_signal_data(sd_event *e, struct signal_data *d) {
+ assert(e);
+
+ if (!d)
+ return;
+
+ hashmap_remove(e->signal_data, &d->priority);
+ safe_close(d->fd);
+ free(d);
+}
+
static int event_make_signal_data(
sd_event *e,
int sig,
return 0;
fail:
- if (added) {
- d->fd = safe_close(d->fd);
- hashmap_remove(e->signal_data, &d->priority);
- free(d);
- }
+ if (added)
+ event_free_signal_data(e, d);
return r;
}
assert_se(sigdelset(&d->sigset, sig) >= 0);
if (sigisemptyset(&d->sigset)) {
-
/* If all the mask is all-zero we can get rid of the structure */
- hashmap_remove(e->signal_data, &d->priority);
- safe_close(d->fd);
- free(d);
+ event_free_signal_data(e, d);
return;
}
* continued to being watched. That's because inotify doesn't really have an API for that: we
* can only change watch masks with access to the original inode either by fd or by path. But
* paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
- * continously and keeping the mount busy which we can't really do. We could reconstruct the
+ * continuously and keeping the mount busy which we can't really do. We could reconstruct the
* original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
* there), but given the need for open_by_handle_at() which is privileged and not universally
* available this would be quite an incomplete solution. Hence we go the other way, leave the
source_disconnect(s);
if (s->type == SOURCE_IO && s->io.owned)
- safe_close(s->io.fd);
+ s->io.fd = safe_close(s->io.fd);
+
+ if (s->destroy_callback)
+ s->destroy_callback(s->userdata);
free(s->description);
free(s);
}
+DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
static int source_set_pending(sd_event_source *s, bool b) {
int r;
sd_event_io_handler_t callback,
void *userdata) {
- sd_event_source *s;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
int r;
assert_return(e, -EINVAL);
s->enabled = SD_EVENT_ON;
r = source_io_register(s, s->enabled, events);
- if (r < 0) {
- source_free(s);
+ if (r < 0)
return r;
- }
if (ret)
*ret = s;
+ TAKE_PTR(s);
return 0;
}
sd_id128_t bootid = {};
/* When we sleep for longer, we try to realign the wakeup to
- the same time wihtin each minute/second/250ms, so that
+ the same time within each minute/second/250ms, so that
events all across the system can be coalesced into a single
CPU wakeup. However, let's take some system-specific
randomness for this value, so that in a network of systems
void *userdata) {
EventSourceType type;
- sd_event_source *s;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
struct clock_data *d;
int r;
r = prioq_put(d->earliest, s, &s->time.earliest_index);
if (r < 0)
- goto fail;
+ return r;
r = prioq_put(d->latest, s, &s->time.latest_index);
if (r < 0)
- goto fail;
+ return r;
if (ret)
*ret = s;
+ TAKE_PTR(s);
return 0;
-
-fail:
- source_free(s);
- return r;
}
static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
sd_event_signal_handler_t callback,
void *userdata) {
- sd_event_source *s;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
struct signal_data *d;
sigset_t ss;
int r;
e->signal_sources[sig] = s;
r = event_make_signal_data(e, sig, &d);
- if (r < 0) {
- source_free(s);
+ if (r < 0)
return r;
- }
/* Use the signal name as description for the event source by default */
(void) sd_event_source_set_description(s, signal_to_string(sig));
if (ret)
*ret = s;
+ TAKE_PTR(s);
return 0;
}
sd_event_child_handler_t callback,
void *userdata) {
- sd_event_source *s;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
int r;
assert_return(e, -EINVAL);
s->enabled = SD_EVENT_ONESHOT;
r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
- if (r < 0) {
- source_free(s);
+ if (r < 0)
return r;
- }
e->n_enabled_child_sources++;
r = event_make_signal_data(e, SIGCHLD, NULL);
if (r < 0) {
e->n_enabled_child_sources--;
- source_free(s);
return r;
}
if (ret)
*ret = s;
+ TAKE_PTR(s);
return 0;
}
sd_event_handler_t callback,
void *userdata) {
- sd_event_source *s;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
int r;
assert_return(e, -EINVAL);
s->enabled = SD_EVENT_ONESHOT;
r = source_set_pending(s, true);
- if (r < 0) {
- source_free(s);
+ if (r < 0)
return r;
- }
if (ret)
*ret = s;
+ TAKE_PTR(s);
return 0;
}
sd_event_handler_t callback,
void *userdata) {
- sd_event_source *s;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
int r;
assert_return(e, -EINVAL);
s->enabled = SD_EVENT_ON;
r = set_put(e->post_sources, s);
- if (r < 0) {
- source_free(s);
+ if (r < 0)
return r;
- }
if (ret)
*ret = s;
+ TAKE_PTR(s);
return 0;
}
sd_event_handler_t callback,
void *userdata) {
- sd_event_source *s;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
int r;
assert_return(e, -EINVAL);
s->enabled = SD_EVENT_ONESHOT;
r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
- if (r < 0) {
- source_free(s);
+ if (r < 0)
return r;
- }
if (ret)
*ret = s;
+ TAKE_PTR(s);
return 0;
}
return 1;
}
-static int inode_data_compare(const void *a, const void *b) {
- const struct inode_data *x = a, *y = b;
+static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
+ int r;
assert(x);
assert(y);
- if (x->dev < y->dev)
- return -1;
- if (x->dev > y->dev)
- return 1;
-
- if (x->ino < y->ino)
- return -1;
- if (x->ino > y->ino)
- return 1;
+ r = CMP(x->dev, y->dev);
+ if (r != 0)
+ return r;
- return 0;
+ return CMP(x->ino, y->ino);
}
-static void inode_data_hash_func(const void *p, struct siphash *state) {
- const struct inode_data *d = p;
-
- assert(p);
+static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
+ assert(d);
siphash24_compress(&d->dev, sizeof(d->dev), state);
siphash24_compress(&d->ino, sizeof(d->ino), state);
}
-const struct hash_ops inode_data_hash_ops = {
- .hash = inode_data_hash_func,
- .compare = inode_data_compare
-};
+DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
static void event_free_inode_data(
sd_event *e,
*
* Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
* because we cannot change the mask anymore after the event source was created once, since the kernel has no
- * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
+ * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
* events we don't care for client-side. */
LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
sd_event_inotify_handler_t callback,
void *userdata) {
- bool rm_inotify = false, rm_inode = false;
struct inotify_data *inotify_data = NULL;
struct inode_data *inode_data = NULL;
_cleanup_close_ int fd = -1;
- sd_event_source *s;
+ _cleanup_(source_freep) sd_event_source *s = NULL;
struct stat st;
int r;
/* Allocate an inotify object for this priority, and an inode object within it */
r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
if (r < 0)
- goto fail;
- rm_inotify = r > 0;
+ return r;
r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
- if (r < 0)
- goto fail;
- rm_inode = r > 0;
+ if (r < 0) {
+ event_free_inotify_data(e, inotify_data);
+ return r;
+ }
/* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
* the event source, until then, for which we need the original inode. */
LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
s->inotify.inode_data = inode_data;
- rm_inode = rm_inotify = false;
-
/* Actually realize the watch now */
r = inode_data_realize_watch(e, inode_data);
if (r < 0)
- goto fail;
+ return r;
(void) sd_event_source_set_description(s, path);
if (ret)
*ret = s;
+ TAKE_PTR(s);
return 0;
-
-fail:
- source_free(s);
-
- if (rm_inode)
- event_free_inode_data(e, inode_data);
-
- if (rm_inotify)
- event_free_inotify_data(e, inotify_data);
-
- return r;
-}
-
-_public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
-
- if (!s)
- return NULL;
-
- assert(s->n_ref >= 1);
- s->n_ref++;
-
- return s;
}
-_public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
-
+static sd_event_source* event_source_free(sd_event_source *s) {
if (!s)
return NULL;
- assert(s->n_ref >= 1);
- s->n_ref--;
-
- if (s->n_ref <= 0) {
- /* Here's a special hack: when we are called from a
- * dispatch handler we won't free the event source
- * immediately, but we will detach the fd from the
- * epoll. This way it is safe for the caller to unref
- * the event source and immediately close the fd, but
- * we still retain a valid event source object after
- * the callback. */
+ /* Here's a special hack: when we are called from a
+ * dispatch handler we won't free the event source
+ * immediately, but we will detach the fd from the
+ * epoll. This way it is safe for the caller to unref
+ * the event source and immediately close the fd, but
+ * we still retain a valid event source object after
+ * the callback. */
- if (s->dispatching) {
- if (s->type == SOURCE_IO)
- source_io_unregister(s);
+ if (s->dispatching) {
+ if (s->type == SOURCE_IO)
+ source_io_unregister(s);
- source_disconnect(s);
- } else
- source_free(s);
- }
+ source_disconnect(s);
+ } else
+ source_free(s);
return NULL;
}
+DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
+
_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
assert_return(s, -EINVAL);
assert_return(!event_pid_changed(s->event), -ECHILD);
_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
assert_return(s, -EINVAL);
assert_return(description, -EINVAL);
- assert_return(s->description, -ENXIO);
assert_return(!event_pid_changed(s->event), -ECHILD);
+ if (!s->description)
+ return -ENXIO;
+
*description = s->description;
return 0;
}
_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
assert_return(s, -EINVAL);
- assert_return(m, -EINVAL);
assert_return(!event_pid_changed(s->event), -ECHILD);
- *m = s->enabled;
- return 0;
+ if (m)
+ *m = s->enabled;
+ return s->enabled != SD_EVENT_OFF;
}
_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
timeout = 0;
m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
- timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
+ timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
if (m < 0) {
if (errno == EINTR) {
e->state = SD_EVENT_PENDING;
*ret = e->iteration;
return 0;
}
+
+_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
+ assert_return(s, -EINVAL);
+
+ s->destroy_callback = callback;
+ return 0;
+}
+
+_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
+ assert_return(s, -EINVAL);
+
+ if (ret)
+ *ret = s->destroy_callback;
+
+ return !!s->destroy_callback;
+}
+
+_public_ int sd_event_source_get_floating(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+
+ return s->floating;
+}
+
+_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
+ assert_return(s, -EINVAL);
+
+ if (s->floating == !!b)
+ return 0;
+
+ if (!s->event) /* Already disconnected */
+ return -ESTALE;
+
+ s->floating = b;
+
+ if (b) {
+ sd_event_source_ref(s);
+ sd_event_unref(s->event);
+ } else {
+ sd_event_ref(s->event);
+ sd_event_source_unref(s);
+ }
+
+ return 1;
+}