1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 Copyright 2013 Lennart Poettering
7 #include <sys/timerfd.h>
10 #include "sd-daemon.h"
14 #include "alloc-util.h"
22 #include "process-util.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "time-util.h"
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
32 typedef enum EventSourceType
{
36 SOURCE_TIME_MONOTONIC
,
37 SOURCE_TIME_REALTIME_ALARM
,
38 SOURCE_TIME_BOOTTIME_ALARM
,
46 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
47 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
50 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
52 [SOURCE_TIME_REALTIME
] = "realtime",
53 [SOURCE_TIME_BOOTTIME
] = "bootime",
54 [SOURCE_TIME_MONOTONIC
] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
57 [SOURCE_SIGNAL
] = "signal",
58 [SOURCE_CHILD
] = "child",
59 [SOURCE_DEFER
] = "defer",
60 [SOURCE_POST
] = "post",
61 [SOURCE_EXIT
] = "exit",
62 [SOURCE_WATCHDOG
] = "watchdog",
63 [SOURCE_INOTIFY
] = "inotify",
66 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
68 /* All objects we use in epoll events start with this value, so that
69 * we know how to dispatch it */
70 typedef enum WakeupType
{
77 _WAKEUP_TYPE_INVALID
= -1,
80 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
84 struct sd_event_source
{
91 sd_event_handler_t prepare
;
95 EventSourceType type
:5;
102 unsigned pending_index
;
103 unsigned prepare_index
;
104 uint64_t pending_iteration
;
105 uint64_t prepare_iteration
;
107 sd_event_destroy_t destroy_callback
;
109 LIST_FIELDS(sd_event_source
, sources
);
113 sd_event_io_handler_t callback
;
121 sd_event_time_handler_t callback
;
122 usec_t next
, accuracy
;
123 unsigned earliest_index
;
124 unsigned latest_index
;
127 sd_event_signal_handler_t callback
;
128 struct signalfd_siginfo siginfo
;
132 sd_event_child_handler_t callback
;
138 sd_event_handler_t callback
;
141 sd_event_handler_t callback
;
144 sd_event_handler_t callback
;
145 unsigned prioq_index
;
148 sd_event_inotify_handler_t callback
;
150 struct inode_data
*inode_data
;
151 LIST_FIELDS(sd_event_source
, by_inode_data
);
160 /* For all clocks we maintain two priority queues each, one
161 * ordered for the earliest times the events may be
162 * dispatched, and one ordered by the latest times they must
163 * have been dispatched. The range between the top entries in
164 * the two prioqs is the time window we can freely schedule
177 /* For each priority we maintain one signal fd, so that we
178 * only have to dequeue a single event per priority at a
184 sd_event_source
*current
;
187 /* A structure listing all event sources currently watching a specific inode */
189 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
193 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
194 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
195 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
196 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
197 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
201 /* The inotify "watch descriptor" */
204 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
205 * most recently been set on the watch descriptor. */
206 uint32_t combined_mask
;
208 /* All event sources subscribed to this inode */
209 LIST_HEAD(sd_event_source
, event_sources
);
211 /* The inotify object we watch this inode with */
212 struct inotify_data
*inotify_data
;
214 /* A linked list of all inode data objects with fds to close (see above) */
215 LIST_FIELDS(struct inode_data
, to_close
);
218 /* A structure encapsulating an inotify fd */
219 struct inotify_data
{
222 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
228 Hashmap
*inodes
; /* The inode_data structures keyed by dev+ino */
229 Hashmap
*wd
; /* The inode_data structures keyed by the watch descriptor for each */
231 /* The buffer we read inotify events into */
232 union inotify_event_buffer buffer
;
233 size_t buffer_filled
; /* fill level of the buffer */
235 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
236 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
237 * the events locally if they can't be coalesced). */
240 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
241 * to make it efficient to figure out what inotify objects to process data on next. */
242 LIST_FIELDS(struct inotify_data
, buffered
);
254 /* timerfd_create() only supports these five clocks so far. We
255 * can add support for more clocks when the kernel learns to
256 * deal with them, too. */
257 struct clock_data realtime
;
258 struct clock_data boottime
;
259 struct clock_data monotonic
;
260 struct clock_data realtime_alarm
;
261 struct clock_data boottime_alarm
;
265 sd_event_source
**signal_sources
; /* indexed by signal number */
266 Hashmap
*signal_data
; /* indexed by priority */
268 Hashmap
*child_sources
;
269 unsigned n_enabled_child_sources
;
275 Hashmap
*inotify_data
; /* indexed by priority */
277 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
278 LIST_HEAD(struct inode_data
, inode_data_to_close
);
280 /* A list of inotify objects that already have events buffered which aren't processed yet */
281 LIST_HEAD(struct inotify_data
, inotify_data_buffered
);
286 triple_timestamp timestamp
;
289 bool exit_requested
:1;
290 bool need_process_child
:1;
292 bool profile_delays
:1;
297 sd_event
**default_event_ptr
;
299 usec_t watchdog_last
, watchdog_period
;
303 LIST_HEAD(sd_event_source
, sources
);
305 usec_t last_run
, last_log
;
306 unsigned delays
[sizeof(usec_t
) * 8];
309 static thread_local sd_event
*default_event
= NULL
;
311 static void source_disconnect(sd_event_source
*s
);
312 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
314 static sd_event
*event_resolve(sd_event
*e
) {
315 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
318 static int pending_prioq_compare(const void *a
, const void *b
) {
319 const sd_event_source
*x
= a
, *y
= b
;
324 /* Enabled ones first */
325 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
327 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
330 /* Lower priority values first */
331 if (x
->priority
< y
->priority
)
333 if (x
->priority
> y
->priority
)
336 /* Older entries first */
337 if (x
->pending_iteration
< y
->pending_iteration
)
339 if (x
->pending_iteration
> y
->pending_iteration
)
345 static int prepare_prioq_compare(const void *a
, const void *b
) {
346 const sd_event_source
*x
= a
, *y
= b
;
351 /* Enabled ones first */
352 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
354 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
357 /* Move most recently prepared ones last, so that we can stop
358 * preparing as soon as we hit one that has already been
359 * prepared in the current iteration */
360 if (x
->prepare_iteration
< y
->prepare_iteration
)
362 if (x
->prepare_iteration
> y
->prepare_iteration
)
365 /* Lower priority values first */
366 if (x
->priority
< y
->priority
)
368 if (x
->priority
> y
->priority
)
374 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
375 const sd_event_source
*x
= a
, *y
= b
;
377 assert(EVENT_SOURCE_IS_TIME(x
->type
));
378 assert(x
->type
== y
->type
);
380 /* Enabled ones first */
381 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
383 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
386 /* Move the pending ones to the end */
387 if (!x
->pending
&& y
->pending
)
389 if (x
->pending
&& !y
->pending
)
393 if (x
->time
.next
< y
->time
.next
)
395 if (x
->time
.next
> y
->time
.next
)
401 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
402 return usec_add(s
->time
.next
, s
->time
.accuracy
);
405 static int latest_time_prioq_compare(const void *a
, const void *b
) {
406 const sd_event_source
*x
= a
, *y
= b
;
408 assert(EVENT_SOURCE_IS_TIME(x
->type
));
409 assert(x
->type
== y
->type
);
411 /* Enabled ones first */
412 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
414 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
417 /* Move the pending ones to the end */
418 if (!x
->pending
&& y
->pending
)
420 if (x
->pending
&& !y
->pending
)
424 if (time_event_source_latest(x
) < time_event_source_latest(y
))
426 if (time_event_source_latest(x
) > time_event_source_latest(y
))
432 static int exit_prioq_compare(const void *a
, const void *b
) {
433 const sd_event_source
*x
= a
, *y
= b
;
435 assert(x
->type
== SOURCE_EXIT
);
436 assert(y
->type
== SOURCE_EXIT
);
438 /* Enabled ones first */
439 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
441 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
444 /* Lower priority values first */
445 if (x
->priority
< y
->priority
)
447 if (x
->priority
> y
->priority
)
453 static void free_clock_data(struct clock_data
*d
) {
455 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
458 prioq_free(d
->earliest
);
459 prioq_free(d
->latest
);
462 static void event_free(sd_event
*e
) {
467 while ((s
= e
->sources
)) {
469 source_disconnect(s
);
470 sd_event_source_unref(s
);
473 assert(e
->n_sources
== 0);
475 if (e
->default_event_ptr
)
476 *(e
->default_event_ptr
) = NULL
;
478 safe_close(e
->epoll_fd
);
479 safe_close(e
->watchdog_fd
);
481 free_clock_data(&e
->realtime
);
482 free_clock_data(&e
->boottime
);
483 free_clock_data(&e
->monotonic
);
484 free_clock_data(&e
->realtime_alarm
);
485 free_clock_data(&e
->boottime_alarm
);
487 prioq_free(e
->pending
);
488 prioq_free(e
->prepare
);
491 free(e
->signal_sources
);
492 hashmap_free(e
->signal_data
);
494 hashmap_free(e
->inotify_data
);
496 hashmap_free(e
->child_sources
);
497 set_free(e
->post_sources
);
501 _public_
int sd_event_new(sd_event
** ret
) {
505 assert_return(ret
, -EINVAL
);
507 e
= new(sd_event
, 1);
515 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
517 .realtime
.next
= USEC_INFINITY
,
518 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
520 .boottime
.next
= USEC_INFINITY
,
521 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
523 .monotonic
.next
= USEC_INFINITY
,
524 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
525 .realtime_alarm
.fd
= -1,
526 .realtime_alarm
.next
= USEC_INFINITY
,
527 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
528 .boottime_alarm
.fd
= -1,
529 .boottime_alarm
.next
= USEC_INFINITY
,
530 .perturb
= USEC_INFINITY
,
531 .original_pid
= getpid_cached(),
534 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
538 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
539 if (e
->epoll_fd
< 0) {
544 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
546 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
547 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
548 e
->profile_delays
= true;
559 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
564 assert(e
->n_ref
>= 1);
570 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
575 assert(e
->n_ref
>= 1);
584 static bool event_pid_changed(sd_event
*e
) {
587 /* We don't support people creating an event loop and keeping
588 * it around over a fork(). Let's complain. */
590 return e
->original_pid
!= getpid_cached();
593 static void source_io_unregister(sd_event_source
*s
) {
597 assert(s
->type
== SOURCE_IO
);
599 if (event_pid_changed(s
->event
))
602 if (!s
->io
.registered
)
605 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
607 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
608 strna(s
->description
), event_source_type_to_string(s
->type
));
610 s
->io
.registered
= false;
613 static int source_io_register(
618 struct epoll_event ev
;
622 assert(s
->type
== SOURCE_IO
);
623 assert(enabled
!= SD_EVENT_OFF
);
625 ev
= (struct epoll_event
) {
626 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
630 if (s
->io
.registered
)
631 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
633 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
637 s
->io
.registered
= true;
642 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
646 case SOURCE_TIME_REALTIME
:
647 return CLOCK_REALTIME
;
649 case SOURCE_TIME_BOOTTIME
:
650 return CLOCK_BOOTTIME
;
652 case SOURCE_TIME_MONOTONIC
:
653 return CLOCK_MONOTONIC
;
655 case SOURCE_TIME_REALTIME_ALARM
:
656 return CLOCK_REALTIME_ALARM
;
658 case SOURCE_TIME_BOOTTIME_ALARM
:
659 return CLOCK_BOOTTIME_ALARM
;
662 return (clockid_t
) -1;
666 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
671 return SOURCE_TIME_REALTIME
;
674 return SOURCE_TIME_BOOTTIME
;
676 case CLOCK_MONOTONIC
:
677 return SOURCE_TIME_MONOTONIC
;
679 case CLOCK_REALTIME_ALARM
:
680 return SOURCE_TIME_REALTIME_ALARM
;
682 case CLOCK_BOOTTIME_ALARM
:
683 return SOURCE_TIME_BOOTTIME_ALARM
;
686 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
690 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
695 case SOURCE_TIME_REALTIME
:
698 case SOURCE_TIME_BOOTTIME
:
701 case SOURCE_TIME_MONOTONIC
:
702 return &e
->monotonic
;
704 case SOURCE_TIME_REALTIME_ALARM
:
705 return &e
->realtime_alarm
;
707 case SOURCE_TIME_BOOTTIME_ALARM
:
708 return &e
->boottime_alarm
;
715 static int event_make_signal_data(
718 struct signal_data
**ret
) {
720 struct epoll_event ev
;
721 struct signal_data
*d
;
729 if (event_pid_changed(e
))
732 if (e
->signal_sources
&& e
->signal_sources
[sig
])
733 priority
= e
->signal_sources
[sig
]->priority
;
735 priority
= SD_EVENT_PRIORITY_NORMAL
;
737 d
= hashmap_get(e
->signal_data
, &priority
);
739 if (sigismember(&d
->sigset
, sig
) > 0) {
745 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
749 d
= new(struct signal_data
, 1);
753 *d
= (struct signal_data
) {
754 .wakeup
= WAKEUP_SIGNAL_DATA
,
756 .priority
= priority
,
759 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
769 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
771 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
785 d
->fd
= fd_move_above_stdio(r
);
787 ev
= (struct epoll_event
) {
792 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
805 d
->fd
= safe_close(d
->fd
);
806 hashmap_remove(e
->signal_data
, &d
->priority
);
813 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
817 /* Turns off the specified signal in the signal data
818 * object. If the signal mask of the object becomes empty that
821 if (sigismember(&d
->sigset
, sig
) == 0)
824 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
826 if (sigisemptyset(&d
->sigset
)) {
828 /* If all the mask is all-zero we can get rid of the structure */
829 hashmap_remove(e
->signal_data
, &d
->priority
);
837 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
838 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
841 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
842 struct signal_data
*d
;
843 static const int64_t zero_priority
= 0;
847 /* Rechecks if the specified signal is still something we are
848 * interested in. If not, we'll unmask it, and possibly drop
849 * the signalfd for it. */
851 if (sig
== SIGCHLD
&&
852 e
->n_enabled_child_sources
> 0)
855 if (e
->signal_sources
&&
856 e
->signal_sources
[sig
] &&
857 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
861 * The specified signal might be enabled in three different queues:
863 * 1) the one that belongs to the priority passed (if it is non-NULL)
864 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
865 * 3) the 0 priority (to cover the SIGCHLD case)
867 * Hence, let's remove it from all three here.
871 d
= hashmap_get(e
->signal_data
, priority
);
873 event_unmask_signal_data(e
, d
, sig
);
876 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
877 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
879 event_unmask_signal_data(e
, d
, sig
);
882 d
= hashmap_get(e
->signal_data
, &zero_priority
);
884 event_unmask_signal_data(e
, d
, sig
);
887 static void source_disconnect(sd_event_source
*s
) {
895 assert(s
->event
->n_sources
> 0);
901 source_io_unregister(s
);
905 case SOURCE_TIME_REALTIME
:
906 case SOURCE_TIME_BOOTTIME
:
907 case SOURCE_TIME_MONOTONIC
:
908 case SOURCE_TIME_REALTIME_ALARM
:
909 case SOURCE_TIME_BOOTTIME_ALARM
: {
910 struct clock_data
*d
;
912 d
= event_get_clock_data(s
->event
, s
->type
);
915 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
916 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
917 d
->needs_rearm
= true;
922 if (s
->signal
.sig
> 0) {
924 if (s
->event
->signal_sources
)
925 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
927 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
933 if (s
->child
.pid
> 0) {
934 if (s
->enabled
!= SD_EVENT_OFF
) {
935 assert(s
->event
->n_enabled_child_sources
> 0);
936 s
->event
->n_enabled_child_sources
--;
939 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
940 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
950 set_remove(s
->event
->post_sources
, s
);
954 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
957 case SOURCE_INOTIFY
: {
958 struct inode_data
*inode_data
;
960 inode_data
= s
->inotify
.inode_data
;
962 struct inotify_data
*inotify_data
;
963 assert_se(inotify_data
= inode_data
->inotify_data
);
965 /* Detach this event source from the inode object */
966 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
967 s
->inotify
.inode_data
= NULL
;
970 assert(inotify_data
->n_pending
> 0);
971 inotify_data
->n_pending
--;
974 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
975 * continued to being watched. That's because inotify doesn't really have an API for that: we
976 * can only change watch masks with access to the original inode either by fd or by path. But
977 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
978 * continously and keeping the mount busy which we can't really do. We could reconstruct the
979 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
980 * there), but given the need for open_by_handle_at() which is privileged and not universally
981 * available this would be quite an incomplete solution. Hence we go the other way, leave the
982 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
983 * anymore after reception. Yes, this sucks, but … Linux … */
985 /* Maybe release the inode data (and its inotify) */
986 event_gc_inode_data(s
->event
, inode_data
);
993 assert_not_reached("Wut? I shouldn't exist.");
997 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
1000 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1004 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
1006 LIST_REMOVE(sources
, event
->sources
, s
);
1010 sd_event_unref(event
);
1013 static void source_free(sd_event_source
*s
) {
1016 source_disconnect(s
);
1018 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
1019 s
->io
.fd
= safe_close(s
->io
.fd
);
1021 if (s
->destroy_callback
)
1022 s
->destroy_callback(s
->userdata
);
1024 free(s
->description
);
1028 static int source_set_pending(sd_event_source
*s
, bool b
) {
1032 assert(s
->type
!= SOURCE_EXIT
);
1034 if (s
->pending
== b
)
1040 s
->pending_iteration
= s
->event
->iteration
;
1042 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
1048 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
1050 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
1051 struct clock_data
*d
;
1053 d
= event_get_clock_data(s
->event
, s
->type
);
1056 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1057 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1058 d
->needs_rearm
= true;
1061 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
1062 struct signal_data
*d
;
1064 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
1065 if (d
&& d
->current
== s
)
1069 if (s
->type
== SOURCE_INOTIFY
) {
1071 assert(s
->inotify
.inode_data
);
1072 assert(s
->inotify
.inode_data
->inotify_data
);
1075 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
1077 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
1078 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
1085 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
1090 s
= new(sd_event_source
, 1);
1094 *s
= (struct sd_event_source
) {
1097 .floating
= floating
,
1099 .pending_index
= PRIOQ_IDX_NULL
,
1100 .prepare_index
= PRIOQ_IDX_NULL
,
1106 LIST_PREPEND(sources
, e
->sources
, s
);
1112 _public_
int sd_event_add_io(
1114 sd_event_source
**ret
,
1117 sd_event_io_handler_t callback
,
1123 assert_return(e
, -EINVAL
);
1124 assert_return(e
= event_resolve(e
), -ENOPKG
);
1125 assert_return(fd
>= 0, -EBADF
);
1126 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1127 assert_return(callback
, -EINVAL
);
1128 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1129 assert_return(!event_pid_changed(e
), -ECHILD
);
1131 s
= source_new(e
, !ret
, SOURCE_IO
);
1135 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1137 s
->io
.events
= events
;
1138 s
->io
.callback
= callback
;
1139 s
->userdata
= userdata
;
1140 s
->enabled
= SD_EVENT_ON
;
1142 r
= source_io_register(s
, s
->enabled
, events
);
1154 static void initialize_perturb(sd_event
*e
) {
1155 sd_id128_t bootid
= {};
1157 /* When we sleep for longer, we try to realign the wakeup to
1158 the same time wihtin each minute/second/250ms, so that
1159 events all across the system can be coalesced into a single
1160 CPU wakeup. However, let's take some system-specific
1161 randomness for this value, so that in a network of systems
1162 with synced clocks timer events are distributed a
1163 bit. Here, we calculate a perturbation usec offset from the
1166 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1169 if (sd_id128_get_boot(&bootid
) >= 0)
1170 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1173 static int event_setup_timer_fd(
1175 struct clock_data
*d
,
1178 struct epoll_event ev
;
1184 if (_likely_(d
->fd
>= 0))
1187 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1191 fd
= fd_move_above_stdio(fd
);
1193 ev
= (struct epoll_event
) {
1198 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1208 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1211 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1214 _public_
int sd_event_add_time(
1216 sd_event_source
**ret
,
1220 sd_event_time_handler_t callback
,
1223 EventSourceType type
;
1225 struct clock_data
*d
;
1228 assert_return(e
, -EINVAL
);
1229 assert_return(e
= event_resolve(e
), -ENOPKG
);
1230 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1231 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1232 assert_return(!event_pid_changed(e
), -ECHILD
);
1234 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1237 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1242 callback
= time_exit_callback
;
1244 d
= event_get_clock_data(e
, type
);
1247 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1251 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1256 r
= event_setup_timer_fd(e
, d
, clock
);
1261 s
= source_new(e
, !ret
, type
);
1265 s
->time
.next
= usec
;
1266 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1267 s
->time
.callback
= callback
;
1268 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1269 s
->userdata
= userdata
;
1270 s
->enabled
= SD_EVENT_ONESHOT
;
1272 d
->needs_rearm
= true;
1274 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1278 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1292 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1295 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1298 _public_
int sd_event_add_signal(
1300 sd_event_source
**ret
,
1302 sd_event_signal_handler_t callback
,
1306 struct signal_data
*d
;
1310 assert_return(e
, -EINVAL
);
1311 assert_return(e
= event_resolve(e
), -ENOPKG
);
1312 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1313 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1314 assert_return(!event_pid_changed(e
), -ECHILD
);
1317 callback
= signal_exit_callback
;
1319 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1323 if (!sigismember(&ss
, sig
))
1326 if (!e
->signal_sources
) {
1327 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1328 if (!e
->signal_sources
)
1330 } else if (e
->signal_sources
[sig
])
1333 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1337 s
->signal
.sig
= sig
;
1338 s
->signal
.callback
= callback
;
1339 s
->userdata
= userdata
;
1340 s
->enabled
= SD_EVENT_ON
;
1342 e
->signal_sources
[sig
] = s
;
1344 r
= event_make_signal_data(e
, sig
, &d
);
1350 /* Use the signal name as description for the event source by default */
1351 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1359 _public_
int sd_event_add_child(
1361 sd_event_source
**ret
,
1364 sd_event_child_handler_t callback
,
1370 assert_return(e
, -EINVAL
);
1371 assert_return(e
= event_resolve(e
), -ENOPKG
);
1372 assert_return(pid
> 1, -EINVAL
);
1373 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1374 assert_return(options
!= 0, -EINVAL
);
1375 assert_return(callback
, -EINVAL
);
1376 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1377 assert_return(!event_pid_changed(e
), -ECHILD
);
1379 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1383 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1386 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1391 s
->child
.options
= options
;
1392 s
->child
.callback
= callback
;
1393 s
->userdata
= userdata
;
1394 s
->enabled
= SD_EVENT_ONESHOT
;
1396 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1402 e
->n_enabled_child_sources
++;
1404 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1406 e
->n_enabled_child_sources
--;
1411 e
->need_process_child
= true;
1419 _public_
int sd_event_add_defer(
1421 sd_event_source
**ret
,
1422 sd_event_handler_t callback
,
1428 assert_return(e
, -EINVAL
);
1429 assert_return(e
= event_resolve(e
), -ENOPKG
);
1430 assert_return(callback
, -EINVAL
);
1431 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1432 assert_return(!event_pid_changed(e
), -ECHILD
);
1434 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1438 s
->defer
.callback
= callback
;
1439 s
->userdata
= userdata
;
1440 s
->enabled
= SD_EVENT_ONESHOT
;
1442 r
= source_set_pending(s
, true);
1454 _public_
int sd_event_add_post(
1456 sd_event_source
**ret
,
1457 sd_event_handler_t callback
,
1463 assert_return(e
, -EINVAL
);
1464 assert_return(e
= event_resolve(e
), -ENOPKG
);
1465 assert_return(callback
, -EINVAL
);
1466 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1467 assert_return(!event_pid_changed(e
), -ECHILD
);
1469 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1473 s
= source_new(e
, !ret
, SOURCE_POST
);
1477 s
->post
.callback
= callback
;
1478 s
->userdata
= userdata
;
1479 s
->enabled
= SD_EVENT_ON
;
1481 r
= set_put(e
->post_sources
, s
);
1493 _public_
int sd_event_add_exit(
1495 sd_event_source
**ret
,
1496 sd_event_handler_t callback
,
1502 assert_return(e
, -EINVAL
);
1503 assert_return(e
= event_resolve(e
), -ENOPKG
);
1504 assert_return(callback
, -EINVAL
);
1505 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1506 assert_return(!event_pid_changed(e
), -ECHILD
);
1508 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1512 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1516 s
->exit
.callback
= callback
;
1517 s
->userdata
= userdata
;
1518 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1519 s
->enabled
= SD_EVENT_ONESHOT
;
1521 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1533 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
1539 assert(hashmap_isempty(d
->inodes
));
1540 assert(hashmap_isempty(d
->wd
));
1542 if (d
->buffer_filled
> 0)
1543 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
1545 hashmap_free(d
->inodes
);
1546 hashmap_free(d
->wd
);
1548 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
1551 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
1552 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
1559 static int event_make_inotify_data(
1562 struct inotify_data
**ret
) {
1564 _cleanup_close_
int fd
= -1;
1565 struct inotify_data
*d
;
1566 struct epoll_event ev
;
1571 d
= hashmap_get(e
->inotify_data
, &priority
);
1578 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
1582 fd
= fd_move_above_stdio(fd
);
1584 r
= hashmap_ensure_allocated(&e
->inotify_data
, &uint64_hash_ops
);
1588 d
= new(struct inotify_data
, 1);
1592 *d
= (struct inotify_data
) {
1593 .wakeup
= WAKEUP_INOTIFY_DATA
,
1595 .priority
= priority
,
1598 r
= hashmap_put(e
->inotify_data
, &d
->priority
, d
);
1600 d
->fd
= safe_close(d
->fd
);
1605 ev
= (struct epoll_event
) {
1610 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
1612 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1613 * remove the fd from the epoll first, which we don't want as we couldn't
1614 * add it in the first place. */
1615 event_free_inotify_data(e
, d
);
1625 static int inode_data_compare(const void *a
, const void *b
) {
1626 const struct inode_data
*x
= a
, *y
= b
;
1631 if (x
->dev
< y
->dev
)
1633 if (x
->dev
> y
->dev
)
1636 if (x
->ino
< y
->ino
)
1638 if (x
->ino
> y
->ino
)
1644 static void inode_data_hash_func(const void *p
, struct siphash
*state
) {
1645 const struct inode_data
*d
= p
;
1649 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
1650 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
1653 const struct hash_ops inode_data_hash_ops
= {
1654 .hash
= inode_data_hash_func
,
1655 .compare
= inode_data_compare
1658 static void event_free_inode_data(
1660 struct inode_data
*d
) {
1667 assert(!d
->event_sources
);
1670 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
1674 if (d
->inotify_data
) {
1677 if (d
->inotify_data
->fd
>= 0) {
1678 /* So here's a problem. At the time this runs the watch descriptor might already be
1679 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1680 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1681 * likely case to happen. */
1683 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
1684 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
1687 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
1690 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
1696 static void event_gc_inode_data(
1698 struct inode_data
*d
) {
1700 struct inotify_data
*inotify_data
;
1707 if (d
->event_sources
)
1710 inotify_data
= d
->inotify_data
;
1711 event_free_inode_data(e
, d
);
1713 if (inotify_data
&& hashmap_isempty(inotify_data
->inodes
))
1714 event_free_inotify_data(e
, inotify_data
);
1717 static int event_make_inode_data(
1719 struct inotify_data
*inotify_data
,
1722 struct inode_data
**ret
) {
1724 struct inode_data
*d
, key
;
1728 assert(inotify_data
);
1730 key
= (struct inode_data
) {
1735 d
= hashmap_get(inotify_data
->inodes
, &key
);
1743 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
1747 d
= new(struct inode_data
, 1);
1751 *d
= (struct inode_data
) {
1756 .inotify_data
= inotify_data
,
1759 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
1771 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
1772 bool excl_unlink
= true;
1773 uint32_t combined
= 0;
1778 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1779 * the IN_EXCL_UNLINK flag is ANDed instead.
1781 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1782 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1783 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
1784 * events we don't care for client-side. */
1786 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
1788 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
1789 excl_unlink
= false;
1791 combined
|= s
->inotify
.mask
;
1794 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
1797 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
1798 uint32_t combined_mask
;
1804 combined_mask
= inode_data_determine_mask(d
);
1806 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
1809 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
1813 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
1818 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
1820 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
1826 } else if (d
->wd
!= wd
) {
1828 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1829 (void) inotify_rm_watch(d
->fd
, wd
);
1833 d
->combined_mask
= combined_mask
;
1837 _public_
int sd_event_add_inotify(
1839 sd_event_source
**ret
,
1842 sd_event_inotify_handler_t callback
,
1845 bool rm_inotify
= false, rm_inode
= false;
1846 struct inotify_data
*inotify_data
= NULL
;
1847 struct inode_data
*inode_data
= NULL
;
1848 _cleanup_close_
int fd
= -1;
1853 assert_return(e
, -EINVAL
);
1854 assert_return(e
= event_resolve(e
), -ENOPKG
);
1855 assert_return(path
, -EINVAL
);
1856 assert_return(callback
, -EINVAL
);
1857 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1858 assert_return(!event_pid_changed(e
), -ECHILD
);
1860 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1861 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1862 * the user can't use them for us. */
1863 if (mask
& IN_MASK_ADD
)
1866 fd
= open(path
, O_PATH
|O_CLOEXEC
|
1867 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0)|
1868 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
1872 if (fstat(fd
, &st
) < 0)
1875 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
1879 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
1880 s
->inotify
.mask
= mask
;
1881 s
->inotify
.callback
= callback
;
1882 s
->userdata
= userdata
;
1884 /* Allocate an inotify object for this priority, and an inode object within it */
1885 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
1890 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
1895 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1896 * the event source, until then, for which we need the original inode. */
1897 if (inode_data
->fd
< 0) {
1898 inode_data
->fd
= TAKE_FD(fd
);
1899 LIST_PREPEND(to_close
, e
->inode_data_to_close
, inode_data
);
1902 /* Link our event source to the inode data object */
1903 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1904 s
->inotify
.inode_data
= inode_data
;
1906 rm_inode
= rm_inotify
= false;
1908 /* Actually realize the watch now */
1909 r
= inode_data_realize_watch(e
, inode_data
);
1913 (void) sd_event_source_set_description(s
, path
);
1924 event_free_inode_data(e
, inode_data
);
1927 event_free_inotify_data(e
, inotify_data
);
1932 _public_ sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
1937 assert(s
->n_ref
>= 1);
1943 _public_ sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
1948 assert(s
->n_ref
>= 1);
1951 if (s
->n_ref
<= 0) {
1952 /* Here's a special hack: when we are called from a
1953 * dispatch handler we won't free the event source
1954 * immediately, but we will detach the fd from the
1955 * epoll. This way it is safe for the caller to unref
1956 * the event source and immediately close the fd, but
1957 * we still retain a valid event source object after
1960 if (s
->dispatching
) {
1961 if (s
->type
== SOURCE_IO
)
1962 source_io_unregister(s
);
1964 source_disconnect(s
);
1972 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1973 assert_return(s
, -EINVAL
);
1974 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1976 return free_and_strdup(&s
->description
, description
);
1979 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1980 assert_return(s
, -EINVAL
);
1981 assert_return(description
, -EINVAL
);
1982 assert_return(s
->description
, -ENXIO
);
1983 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1985 *description
= s
->description
;
1989 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1990 assert_return(s
, NULL
);
1995 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1996 assert_return(s
, -EINVAL
);
1997 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1998 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1999 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2004 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
2005 assert_return(s
, -EINVAL
);
2006 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2007 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2012 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
2015 assert_return(s
, -EINVAL
);
2016 assert_return(fd
>= 0, -EBADF
);
2017 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2018 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2023 if (s
->enabled
== SD_EVENT_OFF
) {
2025 s
->io
.registered
= false;
2029 saved_fd
= s
->io
.fd
;
2030 assert(s
->io
.registered
);
2033 s
->io
.registered
= false;
2035 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
2037 s
->io
.fd
= saved_fd
;
2038 s
->io
.registered
= true;
2042 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
2048 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
2049 assert_return(s
, -EINVAL
);
2050 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2055 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
2056 assert_return(s
, -EINVAL
);
2057 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2063 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
2064 assert_return(s
, -EINVAL
);
2065 assert_return(events
, -EINVAL
);
2066 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2067 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2069 *events
= s
->io
.events
;
2073 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
2076 assert_return(s
, -EINVAL
);
2077 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2078 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
2079 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2080 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2082 /* edge-triggered updates are never skipped, so we can reset edges */
2083 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
2086 r
= source_set_pending(s
, false);
2090 if (s
->enabled
!= SD_EVENT_OFF
) {
2091 r
= source_io_register(s
, s
->enabled
, events
);
2096 s
->io
.events
= events
;
2101 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
2102 assert_return(s
, -EINVAL
);
2103 assert_return(revents
, -EINVAL
);
2104 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2105 assert_return(s
->pending
, -ENODATA
);
2106 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2108 *revents
= s
->io
.revents
;
2112 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
2113 assert_return(s
, -EINVAL
);
2114 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
2115 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2117 return s
->signal
.sig
;
2120 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
2121 assert_return(s
, -EINVAL
);
2122 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2124 *priority
= s
->priority
;
2128 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
2129 bool rm_inotify
= false, rm_inode
= false;
2130 struct inotify_data
*new_inotify_data
= NULL
;
2131 struct inode_data
*new_inode_data
= NULL
;
2134 assert_return(s
, -EINVAL
);
2135 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2136 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2138 if (s
->priority
== priority
)
2141 if (s
->type
== SOURCE_INOTIFY
) {
2142 struct inode_data
*old_inode_data
;
2144 assert(s
->inotify
.inode_data
);
2145 old_inode_data
= s
->inotify
.inode_data
;
2147 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2148 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2149 * events we allow priority changes only until the first following iteration. */
2150 if (old_inode_data
->fd
< 0)
2153 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
2158 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
2163 if (new_inode_data
->fd
< 0) {
2164 /* Duplicate the fd for the new inode object if we don't have any yet */
2165 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
2166 if (new_inode_data
->fd
< 0) {
2171 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close
, new_inode_data
);
2174 /* Move the event source to the new inode data structure */
2175 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2176 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2177 s
->inotify
.inode_data
= new_inode_data
;
2179 /* Now create the new watch */
2180 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
2183 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2184 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2185 s
->inotify
.inode_data
= old_inode_data
;
2189 s
->priority
= priority
;
2191 event_gc_inode_data(s
->event
, old_inode_data
);
2193 } else if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
2194 struct signal_data
*old
, *d
;
2196 /* Move us from the signalfd belonging to the old
2197 * priority to the signalfd of the new priority */
2199 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
2201 s
->priority
= priority
;
2203 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
2205 s
->priority
= old
->priority
;
2209 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
2211 s
->priority
= priority
;
2214 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2217 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2219 if (s
->type
== SOURCE_EXIT
)
2220 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2226 event_free_inode_data(s
->event
, new_inode_data
);
2229 event_free_inotify_data(s
->event
, new_inotify_data
);
2234 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
2235 assert_return(s
, -EINVAL
);
2236 assert_return(m
, -EINVAL
);
2237 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2243 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
2246 assert_return(s
, -EINVAL
);
2247 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
2248 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2250 /* If we are dead anyway, we are fine with turning off
2251 * sources, but everything else needs to fail. */
2252 if (s
->event
->state
== SD_EVENT_FINISHED
)
2253 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
2255 if (s
->enabled
== m
)
2258 if (m
== SD_EVENT_OFF
) {
2260 /* Unset the pending flag when this event source is disabled */
2261 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2262 r
= source_set_pending(s
, false);
2270 source_io_unregister(s
);
2274 case SOURCE_TIME_REALTIME
:
2275 case SOURCE_TIME_BOOTTIME
:
2276 case SOURCE_TIME_MONOTONIC
:
2277 case SOURCE_TIME_REALTIME_ALARM
:
2278 case SOURCE_TIME_BOOTTIME_ALARM
: {
2279 struct clock_data
*d
;
2282 d
= event_get_clock_data(s
->event
, s
->type
);
2285 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2286 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2287 d
->needs_rearm
= true;
2294 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2300 assert(s
->event
->n_enabled_child_sources
> 0);
2301 s
->event
->n_enabled_child_sources
--;
2303 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2308 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2313 case SOURCE_INOTIFY
:
2318 assert_not_reached("Wut? I shouldn't exist.");
2323 /* Unset the pending flag when this event source is enabled */
2324 if (s
->enabled
== SD_EVENT_OFF
&& !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2325 r
= source_set_pending(s
, false);
2333 r
= source_io_register(s
, m
, s
->io
.events
);
2340 case SOURCE_TIME_REALTIME
:
2341 case SOURCE_TIME_BOOTTIME
:
2342 case SOURCE_TIME_MONOTONIC
:
2343 case SOURCE_TIME_REALTIME_ALARM
:
2344 case SOURCE_TIME_BOOTTIME_ALARM
: {
2345 struct clock_data
*d
;
2348 d
= event_get_clock_data(s
->event
, s
->type
);
2351 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2352 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2353 d
->needs_rearm
= true;
2361 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2363 s
->enabled
= SD_EVENT_OFF
;
2364 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2372 if (s
->enabled
== SD_EVENT_OFF
)
2373 s
->event
->n_enabled_child_sources
++;
2377 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2379 s
->enabled
= SD_EVENT_OFF
;
2380 s
->event
->n_enabled_child_sources
--;
2381 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2389 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2394 case SOURCE_INOTIFY
:
2399 assert_not_reached("Wut? I shouldn't exist.");
2404 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2407 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2412 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
2413 assert_return(s
, -EINVAL
);
2414 assert_return(usec
, -EINVAL
);
2415 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2416 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2418 *usec
= s
->time
.next
;
2422 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
2423 struct clock_data
*d
;
2426 assert_return(s
, -EINVAL
);
2427 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2428 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2429 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2431 r
= source_set_pending(s
, false);
2435 s
->time
.next
= usec
;
2437 d
= event_get_clock_data(s
->event
, s
->type
);
2440 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2441 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2442 d
->needs_rearm
= true;
2447 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
2448 assert_return(s
, -EINVAL
);
2449 assert_return(usec
, -EINVAL
);
2450 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2451 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2453 *usec
= s
->time
.accuracy
;
2457 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
2458 struct clock_data
*d
;
2461 assert_return(s
, -EINVAL
);
2462 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
2463 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2464 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2465 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2467 r
= source_set_pending(s
, false);
2472 usec
= DEFAULT_ACCURACY_USEC
;
2474 s
->time
.accuracy
= usec
;
2476 d
= event_get_clock_data(s
->event
, s
->type
);
2479 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2480 d
->needs_rearm
= true;
2485 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
2486 assert_return(s
, -EINVAL
);
2487 assert_return(clock
, -EINVAL
);
2488 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2489 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2491 *clock
= event_source_type_to_clock(s
->type
);
2495 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
2496 assert_return(s
, -EINVAL
);
2497 assert_return(pid
, -EINVAL
);
2498 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2499 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2501 *pid
= s
->child
.pid
;
2505 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
2506 assert_return(s
, -EINVAL
);
2507 assert_return(mask
, -EINVAL
);
2508 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
2509 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2511 *mask
= s
->inotify
.mask
;
2515 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
2518 assert_return(s
, -EINVAL
);
2519 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2520 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2521 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2523 if (s
->prepare
== callback
)
2526 if (callback
&& s
->prepare
) {
2527 s
->prepare
= callback
;
2531 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
2535 s
->prepare
= callback
;
2538 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
2542 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
2547 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
2548 assert_return(s
, NULL
);
2553 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
2556 assert_return(s
, NULL
);
2559 s
->userdata
= userdata
;
2564 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
2571 if (a
>= USEC_INFINITY
)
2572 return USEC_INFINITY
;
2577 initialize_perturb(e
);
2580 Find a good time to wake up again between times a and b. We
2581 have two goals here:
2583 a) We want to wake up as seldom as possible, hence prefer
2584 later times over earlier times.
2586 b) But if we have to wake up, then let's make sure to
2587 dispatch as much as possible on the entire system.
2589 We implement this by waking up everywhere at the same time
2590 within any given minute if we can, synchronised via the
2591 perturbation value determined from the boot ID. If we can't,
2592 then we try to find the same spot in every 10s, then 1s and
2593 then 250ms step. Otherwise, we pick the last possible time
2597 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
2599 if (_unlikely_(c
< USEC_PER_MINUTE
))
2602 c
-= USEC_PER_MINUTE
;
2608 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
2610 if (_unlikely_(c
< USEC_PER_SEC
*10))
2613 c
-= USEC_PER_SEC
*10;
2619 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
2621 if (_unlikely_(c
< USEC_PER_SEC
))
2630 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
2632 if (_unlikely_(c
< USEC_PER_MSEC
*250))
2635 c
-= USEC_PER_MSEC
*250;
2644 static int event_arm_timer(
2646 struct clock_data
*d
) {
2648 struct itimerspec its
= {};
2649 sd_event_source
*a
, *b
;
2656 if (!d
->needs_rearm
)
2659 d
->needs_rearm
= false;
2661 a
= prioq_peek(d
->earliest
);
2662 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2667 if (d
->next
== USEC_INFINITY
)
2671 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2675 d
->next
= USEC_INFINITY
;
2679 b
= prioq_peek(d
->latest
);
2680 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2682 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2686 assert_se(d
->fd
>= 0);
2689 /* We don' want to disarm here, just mean some time looooong ago. */
2690 its
.it_value
.tv_sec
= 0;
2691 its
.it_value
.tv_nsec
= 1;
2693 timespec_store(&its
.it_value
, t
);
2695 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2703 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2706 assert(s
->type
== SOURCE_IO
);
2708 /* If the event source was already pending, we just OR in the
2709 * new revents, otherwise we reset the value. The ORing is
2710 * necessary to handle EPOLLONESHOT events properly where
2711 * readability might happen independently of writability, and
2712 * we need to keep track of both */
2715 s
->io
.revents
|= revents
;
2717 s
->io
.revents
= revents
;
2719 return source_set_pending(s
, true);
2722 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2729 assert_return(events
== EPOLLIN
, -EIO
);
2731 ss
= read(fd
, &x
, sizeof(x
));
2733 if (IN_SET(errno
, EAGAIN
, EINTR
))
2739 if (_unlikely_(ss
!= sizeof(x
)))
2743 *next
= USEC_INFINITY
;
2748 static int process_timer(
2751 struct clock_data
*d
) {
2760 s
= prioq_peek(d
->earliest
);
2763 s
->enabled
== SD_EVENT_OFF
||
2767 r
= source_set_pending(s
, true);
2771 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2772 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2773 d
->needs_rearm
= true;
2779 static int process_child(sd_event
*e
) {
2786 e
->need_process_child
= false;
2789 So, this is ugly. We iteratively invoke waitid() with P_PID
2790 + WNOHANG for each PID we wait for, instead of using
2791 P_ALL. This is because we only want to get child
2792 information of very specific child processes, and not all
2793 of them. We might not have processed the SIGCHLD even of a
2794 previous invocation and we don't want to maintain a
2795 unbounded *per-child* event queue, hence we really don't
2796 want anything flushed out of the kernel's queue that we
2797 don't care about. Since this is O(n) this means that if you
2798 have a lot of processes you probably want to handle SIGCHLD
2801 We do not reap the children here (by using WNOWAIT), this
2802 is only done after the event source is dispatched so that
2803 the callback still sees the process as a zombie.
2806 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2807 assert(s
->type
== SOURCE_CHILD
);
2812 if (s
->enabled
== SD_EVENT_OFF
)
2815 zero(s
->child
.siginfo
);
2816 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2817 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2821 if (s
->child
.siginfo
.si_pid
!= 0) {
2822 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2824 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2825 /* If the child isn't dead then let's
2826 * immediately remove the state change
2827 * from the queue, since there's no
2828 * benefit in leaving it queued */
2830 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2831 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2834 r
= source_set_pending(s
, true);
2843 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2844 bool read_one
= false;
2849 assert_return(events
== EPOLLIN
, -EIO
);
2851 /* If there's a signal queued on this priority and SIGCHLD is
2852 on this priority too, then make sure to recheck the
2853 children we watch. This is because we only ever dequeue
2854 the first signal per priority, and if we dequeue one, and
2855 SIGCHLD might be enqueued later we wouldn't know, but we
2856 might have higher priority children we care about hence we
2857 need to check that explicitly. */
2859 if (sigismember(&d
->sigset
, SIGCHLD
))
2860 e
->need_process_child
= true;
2862 /* If there's already an event source pending for this
2863 * priority we don't read another */
2868 struct signalfd_siginfo si
;
2870 sd_event_source
*s
= NULL
;
2872 n
= read(d
->fd
, &si
, sizeof(si
));
2874 if (IN_SET(errno
, EAGAIN
, EINTR
))
2880 if (_unlikely_(n
!= sizeof(si
)))
2883 assert(SIGNAL_VALID(si
.ssi_signo
));
2887 if (e
->signal_sources
)
2888 s
= e
->signal_sources
[si
.ssi_signo
];
2894 s
->signal
.siginfo
= si
;
2897 r
= source_set_pending(s
, true);
2905 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
) {
2911 assert_return(revents
== EPOLLIN
, -EIO
);
2913 /* If there's already an event source pending for this priority, don't read another */
2914 if (d
->n_pending
> 0)
2917 /* Is the read buffer non-empty? If so, let's not read more */
2918 if (d
->buffer_filled
> 0)
2921 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
2923 if (IN_SET(errno
, EAGAIN
, EINTR
))
2930 d
->buffer_filled
= (size_t) n
;
2931 LIST_PREPEND(buffered
, e
->inotify_data_buffered
, d
);
2936 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
2939 assert(sz
<= d
->buffer_filled
);
2944 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2945 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
2946 d
->buffer_filled
-= sz
;
2948 if (d
->buffer_filled
== 0)
2949 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
2952 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
2958 /* If there's already an event source pending for this priority, don't read another */
2959 if (d
->n_pending
> 0)
2962 while (d
->buffer_filled
> 0) {
2965 /* Let's validate that the event structures are complete */
2966 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
2969 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2970 if (d
->buffer_filled
< sz
)
2973 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
2974 struct inode_data
*inode_data
;
2977 /* The queue overran, let's pass this event to all event sources connected to this inotify
2980 HASHMAP_FOREACH(inode_data
, d
->inodes
, i
) {
2983 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2985 if (s
->enabled
== SD_EVENT_OFF
)
2988 r
= source_set_pending(s
, true);
2994 struct inode_data
*inode_data
;
2997 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2998 * our watch descriptor table. */
2999 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
3001 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3003 event_inotify_data_drop(e
, d
, sz
);
3007 /* The watch descriptor was removed by the kernel, let's drop it here too */
3008 inode_data
->wd
= -1;
3010 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3012 event_inotify_data_drop(e
, d
, sz
);
3017 /* Trigger all event sources that are interested in these events. Also trigger all event
3018 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3019 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
3021 if (s
->enabled
== SD_EVENT_OFF
)
3024 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
3025 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
3028 r
= source_set_pending(s
, true);
3034 /* Something pending now? If so, let's finish, otherwise let's read more. */
3035 if (d
->n_pending
> 0)
3042 static int process_inotify(sd_event
*e
) {
3043 struct inotify_data
*d
;
3048 LIST_FOREACH(buffered
, d
, e
->inotify_data_buffered
) {
3049 r
= event_inotify_data_process(e
, d
);
3059 static int source_dispatch(sd_event_source
*s
) {
3060 EventSourceType saved_type
;
3064 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
3066 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3068 saved_type
= s
->type
;
3070 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
3071 r
= source_set_pending(s
, false);
3076 if (s
->type
!= SOURCE_POST
) {
3080 /* If we execute a non-post source, let's mark all
3081 * post sources as pending */
3083 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
3084 if (z
->enabled
== SD_EVENT_OFF
)
3087 r
= source_set_pending(z
, true);
3093 if (s
->enabled
== SD_EVENT_ONESHOT
) {
3094 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3099 s
->dispatching
= true;
3104 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
3107 case SOURCE_TIME_REALTIME
:
3108 case SOURCE_TIME_BOOTTIME
:
3109 case SOURCE_TIME_MONOTONIC
:
3110 case SOURCE_TIME_REALTIME_ALARM
:
3111 case SOURCE_TIME_BOOTTIME_ALARM
:
3112 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
3116 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
3119 case SOURCE_CHILD
: {
3122 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
3124 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
3126 /* Now, reap the PID for good. */
3128 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
3134 r
= s
->defer
.callback(s
, s
->userdata
);
3138 r
= s
->post
.callback(s
, s
->userdata
);
3142 r
= s
->exit
.callback(s
, s
->userdata
);
3145 case SOURCE_INOTIFY
: {
3146 struct sd_event
*e
= s
->event
;
3147 struct inotify_data
*d
;
3150 assert(s
->inotify
.inode_data
);
3151 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
3153 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
3154 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
3155 assert(d
->buffer_filled
>= sz
);
3157 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
3159 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3161 if (d
->n_pending
== 0)
3162 event_inotify_data_drop(e
, d
, sz
);
3167 case SOURCE_WATCHDOG
:
3168 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
3169 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
3170 assert_not_reached("Wut? I shouldn't exist.");
3173 s
->dispatching
= false;
3176 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
3177 strna(s
->description
), event_source_type_to_string(saved_type
));
3182 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3187 static int event_prepare(sd_event
*e
) {
3195 s
= prioq_peek(e
->prepare
);
3196 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
3199 s
->prepare_iteration
= e
->iteration
;
3200 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
3206 s
->dispatching
= true;
3207 r
= s
->prepare(s
, s
->userdata
);
3208 s
->dispatching
= false;
3211 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3212 strna(s
->description
), event_source_type_to_string(s
->type
));
3217 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3223 static int dispatch_exit(sd_event
*e
) {
3225 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3230 p
= prioq_peek(e
->exit
);
3231 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
3232 e
->state
= SD_EVENT_FINISHED
;
3236 ref
= sd_event_ref(e
);
3238 e
->state
= SD_EVENT_EXITING
;
3239 r
= source_dispatch(p
);
3240 e
->state
= SD_EVENT_INITIAL
;
3244 static sd_event_source
* event_next_pending(sd_event
*e
) {
3249 p
= prioq_peek(e
->pending
);
3253 if (p
->enabled
== SD_EVENT_OFF
)
3259 static int arm_watchdog(sd_event
*e
) {
3260 struct itimerspec its
= {};
3265 assert(e
->watchdog_fd
>= 0);
3267 t
= sleep_between(e
,
3268 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
3269 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
3271 timespec_store(&its
.it_value
, t
);
3273 /* Make sure we never set the watchdog to 0, which tells the
3274 * kernel to disable it. */
3275 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
3276 its
.it_value
.tv_nsec
= 1;
3278 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
3285 static int process_watchdog(sd_event
*e
) {
3291 /* Don't notify watchdog too often */
3292 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
3295 sd_notify(false, "WATCHDOG=1");
3296 e
->watchdog_last
= e
->timestamp
.monotonic
;
3298 return arm_watchdog(e
);
3301 static void event_close_inode_data_fds(sd_event
*e
) {
3302 struct inode_data
*d
;
3306 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3307 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3308 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3309 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3312 while ((d
= e
->inode_data_to_close
)) {
3314 d
->fd
= safe_close(d
->fd
);
3316 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
3320 _public_
int sd_event_prepare(sd_event
*e
) {
3323 assert_return(e
, -EINVAL
);
3324 assert_return(e
= event_resolve(e
), -ENOPKG
);
3325 assert_return(!event_pid_changed(e
), -ECHILD
);
3326 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3327 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3329 if (e
->exit_requested
)
3334 e
->state
= SD_EVENT_PREPARING
;
3335 r
= event_prepare(e
);
3336 e
->state
= SD_EVENT_INITIAL
;
3340 r
= event_arm_timer(e
, &e
->realtime
);
3344 r
= event_arm_timer(e
, &e
->boottime
);
3348 r
= event_arm_timer(e
, &e
->monotonic
);
3352 r
= event_arm_timer(e
, &e
->realtime_alarm
);
3356 r
= event_arm_timer(e
, &e
->boottime_alarm
);
3360 event_close_inode_data_fds(e
);
3362 if (event_next_pending(e
) || e
->need_process_child
)
3365 e
->state
= SD_EVENT_ARMED
;
3370 e
->state
= SD_EVENT_ARMED
;
3371 r
= sd_event_wait(e
, 0);
3373 e
->state
= SD_EVENT_ARMED
;
3378 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
3379 struct epoll_event
*ev_queue
;
3380 unsigned ev_queue_max
;
3383 assert_return(e
, -EINVAL
);
3384 assert_return(e
= event_resolve(e
), -ENOPKG
);
3385 assert_return(!event_pid_changed(e
), -ECHILD
);
3386 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3387 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
3389 if (e
->exit_requested
) {
3390 e
->state
= SD_EVENT_PENDING
;
3394 ev_queue_max
= MAX(e
->n_sources
, 1u);
3395 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
3397 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3398 if (e
->inotify_data_buffered
)
3401 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
3402 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
3404 if (errno
== EINTR
) {
3405 e
->state
= SD_EVENT_PENDING
;
3413 triple_timestamp_get(&e
->timestamp
);
3415 for (i
= 0; i
< m
; i
++) {
3417 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
3418 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
3420 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
3424 case WAKEUP_EVENT_SOURCE
:
3425 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3428 case WAKEUP_CLOCK_DATA
: {
3429 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
3430 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
3434 case WAKEUP_SIGNAL_DATA
:
3435 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3438 case WAKEUP_INOTIFY_DATA
:
3439 r
= event_inotify_data_read(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3443 assert_not_reached("Invalid wake-up pointer");
3450 r
= process_watchdog(e
);
3454 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
3458 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
3462 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
3466 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
3470 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
3474 if (e
->need_process_child
) {
3475 r
= process_child(e
);
3480 r
= process_inotify(e
);
3484 if (event_next_pending(e
)) {
3485 e
->state
= SD_EVENT_PENDING
;
3493 e
->state
= SD_EVENT_INITIAL
;
3498 _public_
int sd_event_dispatch(sd_event
*e
) {
3502 assert_return(e
, -EINVAL
);
3503 assert_return(e
= event_resolve(e
), -ENOPKG
);
3504 assert_return(!event_pid_changed(e
), -ECHILD
);
3505 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3506 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
3508 if (e
->exit_requested
)
3509 return dispatch_exit(e
);
3511 p
= event_next_pending(e
);
3513 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3515 ref
= sd_event_ref(e
);
3516 e
->state
= SD_EVENT_RUNNING
;
3517 r
= source_dispatch(p
);
3518 e
->state
= SD_EVENT_INITIAL
;
3522 e
->state
= SD_EVENT_INITIAL
;
3527 static void event_log_delays(sd_event
*e
) {
3528 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1];
3532 for (i
= o
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
3533 o
+= snprintf(&b
[o
], sizeof(b
) - o
, "%u ", e
->delays
[i
]);
3536 log_debug("Event loop iterations: %.*s", o
, b
);
3539 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
3542 assert_return(e
, -EINVAL
);
3543 assert_return(e
= event_resolve(e
), -ENOPKG
);
3544 assert_return(!event_pid_changed(e
), -ECHILD
);
3545 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3546 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3548 if (e
->profile_delays
&& e
->last_run
) {
3552 this_run
= now(CLOCK_MONOTONIC
);
3554 l
= u64log2(this_run
- e
->last_run
);
3555 assert(l
< sizeof(e
->delays
));
3558 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
3559 event_log_delays(e
);
3560 e
->last_log
= this_run
;
3564 r
= sd_event_prepare(e
);
3566 /* There was nothing? Then wait... */
3567 r
= sd_event_wait(e
, timeout
);
3569 if (e
->profile_delays
)
3570 e
->last_run
= now(CLOCK_MONOTONIC
);
3573 /* There's something now, then let's dispatch it */
3574 r
= sd_event_dispatch(e
);
3584 _public_
int sd_event_loop(sd_event
*e
) {
3585 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3588 assert_return(e
, -EINVAL
);
3589 assert_return(e
= event_resolve(e
), -ENOPKG
);
3590 assert_return(!event_pid_changed(e
), -ECHILD
);
3591 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3593 ref
= sd_event_ref(e
);
3595 while (e
->state
!= SD_EVENT_FINISHED
) {
3596 r
= sd_event_run(e
, (uint64_t) -1);
3601 return e
->exit_code
;
3604 _public_
int sd_event_get_fd(sd_event
*e
) {
3606 assert_return(e
, -EINVAL
);
3607 assert_return(e
= event_resolve(e
), -ENOPKG
);
3608 assert_return(!event_pid_changed(e
), -ECHILD
);
3613 _public_
int sd_event_get_state(sd_event
*e
) {
3614 assert_return(e
, -EINVAL
);
3615 assert_return(e
= event_resolve(e
), -ENOPKG
);
3616 assert_return(!event_pid_changed(e
), -ECHILD
);
3621 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
3622 assert_return(e
, -EINVAL
);
3623 assert_return(e
= event_resolve(e
), -ENOPKG
);
3624 assert_return(code
, -EINVAL
);
3625 assert_return(!event_pid_changed(e
), -ECHILD
);
3627 if (!e
->exit_requested
)
3630 *code
= e
->exit_code
;
3634 _public_
int sd_event_exit(sd_event
*e
, int code
) {
3635 assert_return(e
, -EINVAL
);
3636 assert_return(e
= event_resolve(e
), -ENOPKG
);
3637 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3638 assert_return(!event_pid_changed(e
), -ECHILD
);
3640 e
->exit_requested
= true;
3641 e
->exit_code
= code
;
3646 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
3647 assert_return(e
, -EINVAL
);
3648 assert_return(e
= event_resolve(e
), -ENOPKG
);
3649 assert_return(usec
, -EINVAL
);
3650 assert_return(!event_pid_changed(e
), -ECHILD
);
3652 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
3655 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3656 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3657 * the purpose of getting the time this doesn't matter. */
3658 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
3661 if (!triple_timestamp_is_set(&e
->timestamp
)) {
3662 /* Implicitly fall back to now() if we never ran
3663 * before and thus have no cached time. */
3668 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
3672 _public_
int sd_event_default(sd_event
**ret
) {
3677 return !!default_event
;
3679 if (default_event
) {
3680 *ret
= sd_event_ref(default_event
);
3684 r
= sd_event_new(&e
);
3688 e
->default_event_ptr
= &default_event
;
3696 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
3697 assert_return(e
, -EINVAL
);
3698 assert_return(e
= event_resolve(e
), -ENOPKG
);
3699 assert_return(tid
, -EINVAL
);
3700 assert_return(!event_pid_changed(e
), -ECHILD
);
3710 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
3713 assert_return(e
, -EINVAL
);
3714 assert_return(e
= event_resolve(e
), -ENOPKG
);
3715 assert_return(!event_pid_changed(e
), -ECHILD
);
3717 if (e
->watchdog
== !!b
)
3721 struct epoll_event ev
;
3723 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
3727 /* Issue first ping immediately */
3728 sd_notify(false, "WATCHDOG=1");
3729 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
3731 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
3732 if (e
->watchdog_fd
< 0)
3735 r
= arm_watchdog(e
);
3739 ev
= (struct epoll_event
) {
3741 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
3744 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
3751 if (e
->watchdog_fd
>= 0) {
3752 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
3753 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3761 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3765 _public_
int sd_event_get_watchdog(sd_event
*e
) {
3766 assert_return(e
, -EINVAL
);
3767 assert_return(e
= event_resolve(e
), -ENOPKG
);
3768 assert_return(!event_pid_changed(e
), -ECHILD
);
3773 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
3774 assert_return(e
, -EINVAL
);
3775 assert_return(e
= event_resolve(e
), -ENOPKG
);
3776 assert_return(!event_pid_changed(e
), -ECHILD
);
3778 *ret
= e
->iteration
;
3782 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
3783 assert_return(s
, -EINVAL
);
3785 s
->destroy_callback
= callback
;
3789 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
3790 assert_return(s
, -EINVAL
);
3793 *ret
= s
->destroy_callback
;
3795 return !!s
->destroy_callback
;