1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <sys/epoll.h>
22 #include <sys/timerfd.h>
25 #include "sd-daemon.h"
29 #include "alloc-util.h"
36 #include "process-util.h"
38 #include "signal-util.h"
39 #include "string-table.h"
40 #include "string-util.h"
41 #include "time-util.h"
44 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
46 typedef enum EventSourceType
{
50 SOURCE_TIME_MONOTONIC
,
51 SOURCE_TIME_REALTIME_ALARM
,
52 SOURCE_TIME_BOOTTIME_ALARM
,
59 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
60 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
63 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
65 [SOURCE_TIME_REALTIME
] = "realtime",
66 [SOURCE_TIME_BOOTTIME
] = "bootime",
67 [SOURCE_TIME_MONOTONIC
] = "monotonic",
68 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
69 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
70 [SOURCE_SIGNAL
] = "signal",
71 [SOURCE_CHILD
] = "child",
72 [SOURCE_DEFER
] = "defer",
73 [SOURCE_POST
] = "post",
74 [SOURCE_EXIT
] = "exit",
75 [SOURCE_WATCHDOG
] = "watchdog",
78 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
80 /* All objects we use in epoll events start with this value, so that
81 * we know how to dispatch it */
82 typedef enum WakeupType
{
88 _WAKEUP_TYPE_INVALID
= -1,
91 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
93 struct sd_event_source
{
100 sd_event_handler_t prepare
;
104 EventSourceType type
:5;
111 unsigned pending_index
;
112 unsigned prepare_index
;
113 uint64_t pending_iteration
;
114 uint64_t prepare_iteration
;
116 LIST_FIELDS(sd_event_source
, sources
);
120 sd_event_io_handler_t callback
;
127 sd_event_time_handler_t callback
;
128 usec_t next
, accuracy
;
129 unsigned earliest_index
;
130 unsigned latest_index
;
133 sd_event_signal_handler_t callback
;
134 struct signalfd_siginfo siginfo
;
138 sd_event_child_handler_t callback
;
144 sd_event_handler_t callback
;
147 sd_event_handler_t callback
;
150 sd_event_handler_t callback
;
151 unsigned prioq_index
;
160 /* For all clocks we maintain two priority queues each, one
161 * ordered for the earliest times the events may be
162 * dispatched, and one ordered by the latest times they must
163 * have been dispatched. The range between the top entries in
164 * the two prioqs is the time window we can freely schedule
177 /* For each priority we maintain one signal fd, so that we
178 * only have to dequeue a single event per priority at a
184 sd_event_source
*current
;
196 /* timerfd_create() only supports these five clocks so far. We
197 * can add support for more clocks when the kernel learns to
198 * deal with them, too. */
199 struct clock_data realtime
;
200 struct clock_data boottime
;
201 struct clock_data monotonic
;
202 struct clock_data realtime_alarm
;
203 struct clock_data boottime_alarm
;
207 sd_event_source
**signal_sources
; /* indexed by signal number */
208 Hashmap
*signal_data
; /* indexed by priority */
210 Hashmap
*child_sources
;
211 unsigned n_enabled_child_sources
;
220 triple_timestamp timestamp
;
223 bool exit_requested
:1;
224 bool need_process_child
:1;
226 bool profile_delays
:1;
231 sd_event
**default_event_ptr
;
233 usec_t watchdog_last
, watchdog_period
;
237 LIST_HEAD(sd_event_source
, sources
);
239 usec_t last_run
, last_log
;
240 unsigned delays
[sizeof(usec_t
) * 8];
243 static void source_disconnect(sd_event_source
*s
);
245 static int pending_prioq_compare(const void *a
, const void *b
) {
246 const sd_event_source
*x
= a
, *y
= b
;
251 /* Enabled ones first */
252 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
254 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
257 /* Lower priority values first */
258 if (x
->priority
< y
->priority
)
260 if (x
->priority
> y
->priority
)
263 /* Older entries first */
264 if (x
->pending_iteration
< y
->pending_iteration
)
266 if (x
->pending_iteration
> y
->pending_iteration
)
272 static int prepare_prioq_compare(const void *a
, const void *b
) {
273 const sd_event_source
*x
= a
, *y
= b
;
278 /* Enabled ones first */
279 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
281 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
284 /* Move most recently prepared ones last, so that we can stop
285 * preparing as soon as we hit one that has already been
286 * prepared in the current iteration */
287 if (x
->prepare_iteration
< y
->prepare_iteration
)
289 if (x
->prepare_iteration
> y
->prepare_iteration
)
292 /* Lower priority values first */
293 if (x
->priority
< y
->priority
)
295 if (x
->priority
> y
->priority
)
301 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
302 const sd_event_source
*x
= a
, *y
= b
;
304 assert(EVENT_SOURCE_IS_TIME(x
->type
));
305 assert(x
->type
== y
->type
);
307 /* Enabled ones first */
308 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
310 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
313 /* Move the pending ones to the end */
314 if (!x
->pending
&& y
->pending
)
316 if (x
->pending
&& !y
->pending
)
320 if (x
->time
.next
< y
->time
.next
)
322 if (x
->time
.next
> y
->time
.next
)
328 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
329 return usec_add(s
->time
.next
, s
->time
.accuracy
);
332 static int latest_time_prioq_compare(const void *a
, const void *b
) {
333 const sd_event_source
*x
= a
, *y
= b
;
335 assert(EVENT_SOURCE_IS_TIME(x
->type
));
336 assert(x
->type
== y
->type
);
338 /* Enabled ones first */
339 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
341 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
344 /* Move the pending ones to the end */
345 if (!x
->pending
&& y
->pending
)
347 if (x
->pending
&& !y
->pending
)
351 if (time_event_source_latest(x
) < time_event_source_latest(y
))
353 if (time_event_source_latest(x
) > time_event_source_latest(y
))
359 static int exit_prioq_compare(const void *a
, const void *b
) {
360 const sd_event_source
*x
= a
, *y
= b
;
362 assert(x
->type
== SOURCE_EXIT
);
363 assert(y
->type
== SOURCE_EXIT
);
365 /* Enabled ones first */
366 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
368 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
371 /* Lower priority values first */
372 if (x
->priority
< y
->priority
)
374 if (x
->priority
> y
->priority
)
380 static void free_clock_data(struct clock_data
*d
) {
382 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
385 prioq_free(d
->earliest
);
386 prioq_free(d
->latest
);
389 static void event_free(sd_event
*e
) {
394 while ((s
= e
->sources
)) {
396 source_disconnect(s
);
397 sd_event_source_unref(s
);
400 assert(e
->n_sources
== 0);
402 if (e
->default_event_ptr
)
403 *(e
->default_event_ptr
) = NULL
;
405 safe_close(e
->epoll_fd
);
406 safe_close(e
->watchdog_fd
);
408 free_clock_data(&e
->realtime
);
409 free_clock_data(&e
->boottime
);
410 free_clock_data(&e
->monotonic
);
411 free_clock_data(&e
->realtime_alarm
);
412 free_clock_data(&e
->boottime_alarm
);
414 prioq_free(e
->pending
);
415 prioq_free(e
->prepare
);
418 free(e
->signal_sources
);
419 hashmap_free(e
->signal_data
);
421 hashmap_free(e
->child_sources
);
422 set_free(e
->post_sources
);
426 _public_
int sd_event_new(sd_event
** ret
) {
430 assert_return(ret
, -EINVAL
);
432 e
= new0(sd_event
, 1);
437 e
->watchdog_fd
= e
->epoll_fd
= e
->realtime
.fd
= e
->boottime
.fd
= e
->monotonic
.fd
= e
->realtime_alarm
.fd
= e
->boottime_alarm
.fd
= -1;
438 e
->realtime
.next
= e
->boottime
.next
= e
->monotonic
.next
= e
->realtime_alarm
.next
= e
->boottime_alarm
.next
= USEC_INFINITY
;
439 e
->realtime
.wakeup
= e
->boottime
.wakeup
= e
->monotonic
.wakeup
= e
->realtime_alarm
.wakeup
= e
->boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
;
440 e
->original_pid
= getpid_cached();
441 e
->perturb
= USEC_INFINITY
;
443 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
447 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
448 if (e
->epoll_fd
< 0) {
453 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
454 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
455 e
->profile_delays
= true;
466 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
471 assert(e
->n_ref
>= 1);
477 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
482 assert(e
->n_ref
>= 1);
491 static bool event_pid_changed(sd_event
*e
) {
494 /* We don't support people creating an event loop and keeping
495 * it around over a fork(). Let's complain. */
497 return e
->original_pid
!= getpid_cached();
500 static void source_io_unregister(sd_event_source
*s
) {
504 assert(s
->type
== SOURCE_IO
);
506 if (event_pid_changed(s
->event
))
509 if (!s
->io
.registered
)
512 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
514 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
515 strna(s
->description
), event_source_type_to_string(s
->type
));
517 s
->io
.registered
= false;
520 static int source_io_register(
525 struct epoll_event ev
= {};
529 assert(s
->type
== SOURCE_IO
);
530 assert(enabled
!= SD_EVENT_OFF
);
535 if (enabled
== SD_EVENT_ONESHOT
)
536 ev
.events
|= EPOLLONESHOT
;
538 if (s
->io
.registered
)
539 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
541 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
545 s
->io
.registered
= true;
550 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
554 case SOURCE_TIME_REALTIME
:
555 return CLOCK_REALTIME
;
557 case SOURCE_TIME_BOOTTIME
:
558 return CLOCK_BOOTTIME
;
560 case SOURCE_TIME_MONOTONIC
:
561 return CLOCK_MONOTONIC
;
563 case SOURCE_TIME_REALTIME_ALARM
:
564 return CLOCK_REALTIME_ALARM
;
566 case SOURCE_TIME_BOOTTIME_ALARM
:
567 return CLOCK_BOOTTIME_ALARM
;
570 return (clockid_t
) -1;
574 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
579 return SOURCE_TIME_REALTIME
;
582 return SOURCE_TIME_BOOTTIME
;
584 case CLOCK_MONOTONIC
:
585 return SOURCE_TIME_MONOTONIC
;
587 case CLOCK_REALTIME_ALARM
:
588 return SOURCE_TIME_REALTIME_ALARM
;
590 case CLOCK_BOOTTIME_ALARM
:
591 return SOURCE_TIME_BOOTTIME_ALARM
;
594 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
598 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
603 case SOURCE_TIME_REALTIME
:
606 case SOURCE_TIME_BOOTTIME
:
609 case SOURCE_TIME_MONOTONIC
:
610 return &e
->monotonic
;
612 case SOURCE_TIME_REALTIME_ALARM
:
613 return &e
->realtime_alarm
;
615 case SOURCE_TIME_BOOTTIME_ALARM
:
616 return &e
->boottime_alarm
;
623 static int event_make_signal_data(
626 struct signal_data
**ret
) {
628 struct epoll_event ev
= {};
629 struct signal_data
*d
;
637 if (event_pid_changed(e
))
640 if (e
->signal_sources
&& e
->signal_sources
[sig
])
641 priority
= e
->signal_sources
[sig
]->priority
;
645 d
= hashmap_get(e
->signal_data
, &priority
);
647 if (sigismember(&d
->sigset
, sig
) > 0) {
653 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
657 d
= new0(struct signal_data
, 1);
661 d
->wakeup
= WAKEUP_SIGNAL_DATA
;
663 d
->priority
= priority
;
665 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
675 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
677 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
696 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
709 d
->fd
= safe_close(d
->fd
);
710 hashmap_remove(e
->signal_data
, &d
->priority
);
717 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
721 /* Turns off the specified signal in the signal data
722 * object. If the signal mask of the object becomes empty that
725 if (sigismember(&d
->sigset
, sig
) == 0)
728 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
730 if (sigisemptyset(&d
->sigset
)) {
732 /* If all the mask is all-zero we can get rid of the structure */
733 hashmap_remove(e
->signal_data
, &d
->priority
);
741 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
742 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
745 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
746 struct signal_data
*d
;
747 static const int64_t zero_priority
= 0;
751 /* Rechecks if the specified signal is still something we are
752 * interested in. If not, we'll unmask it, and possibly drop
753 * the signalfd for it. */
755 if (sig
== SIGCHLD
&&
756 e
->n_enabled_child_sources
> 0)
759 if (e
->signal_sources
&&
760 e
->signal_sources
[sig
] &&
761 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
765 * The specified signal might be enabled in three different queues:
767 * 1) the one that belongs to the priority passed (if it is non-NULL)
768 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
769 * 3) the 0 priority (to cover the SIGCHLD case)
771 * Hence, let's remove it from all three here.
775 d
= hashmap_get(e
->signal_data
, priority
);
777 event_unmask_signal_data(e
, d
, sig
);
780 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
781 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
783 event_unmask_signal_data(e
, d
, sig
);
786 d
= hashmap_get(e
->signal_data
, &zero_priority
);
788 event_unmask_signal_data(e
, d
, sig
);
791 static void source_disconnect(sd_event_source
*s
) {
799 assert(s
->event
->n_sources
> 0);
805 source_io_unregister(s
);
809 case SOURCE_TIME_REALTIME
:
810 case SOURCE_TIME_BOOTTIME
:
811 case SOURCE_TIME_MONOTONIC
:
812 case SOURCE_TIME_REALTIME_ALARM
:
813 case SOURCE_TIME_BOOTTIME_ALARM
: {
814 struct clock_data
*d
;
816 d
= event_get_clock_data(s
->event
, s
->type
);
819 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
820 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
821 d
->needs_rearm
= true;
826 if (s
->signal
.sig
> 0) {
828 if (s
->event
->signal_sources
)
829 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
831 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
837 if (s
->child
.pid
> 0) {
838 if (s
->enabled
!= SD_EVENT_OFF
) {
839 assert(s
->event
->n_enabled_child_sources
> 0);
840 s
->event
->n_enabled_child_sources
--;
843 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
844 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
854 set_remove(s
->event
->post_sources
, s
);
858 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
862 assert_not_reached("Wut? I shouldn't exist.");
866 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
869 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
873 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
875 LIST_REMOVE(sources
, event
->sources
, s
);
879 sd_event_unref(event
);
882 static void source_free(sd_event_source
*s
) {
885 source_disconnect(s
);
886 free(s
->description
);
890 static int source_set_pending(sd_event_source
*s
, bool b
) {
894 assert(s
->type
!= SOURCE_EXIT
);
902 s
->pending_iteration
= s
->event
->iteration
;
904 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
910 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
912 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
913 struct clock_data
*d
;
915 d
= event_get_clock_data(s
->event
, s
->type
);
918 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
919 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
920 d
->needs_rearm
= true;
923 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
924 struct signal_data
*d
;
926 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
927 if (d
&& d
->current
== s
)
934 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
939 s
= new0(sd_event_source
, 1);
945 s
->floating
= floating
;
947 s
->pending_index
= s
->prepare_index
= PRIOQ_IDX_NULL
;
952 LIST_PREPEND(sources
, e
->sources
, s
);
958 _public_
int sd_event_add_io(
960 sd_event_source
**ret
,
963 sd_event_io_handler_t callback
,
969 assert_return(e
, -EINVAL
);
970 assert_return(fd
>= 0, -EBADF
);
971 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
972 assert_return(callback
, -EINVAL
);
973 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
974 assert_return(!event_pid_changed(e
), -ECHILD
);
976 s
= source_new(e
, !ret
, SOURCE_IO
);
980 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
982 s
->io
.events
= events
;
983 s
->io
.callback
= callback
;
984 s
->userdata
= userdata
;
985 s
->enabled
= SD_EVENT_ON
;
987 r
= source_io_register(s
, s
->enabled
, events
);
999 static void initialize_perturb(sd_event
*e
) {
1000 sd_id128_t bootid
= {};
1002 /* When we sleep for longer, we try to realign the wakeup to
1003 the same time wihtin each minute/second/250ms, so that
1004 events all across the system can be coalesced into a single
1005 CPU wakeup. However, let's take some system-specific
1006 randomness for this value, so that in a network of systems
1007 with synced clocks timer events are distributed a
1008 bit. Here, we calculate a perturbation usec offset from the
1011 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1014 if (sd_id128_get_boot(&bootid
) >= 0)
1015 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1018 static int event_setup_timer_fd(
1020 struct clock_data
*d
,
1023 struct epoll_event ev
= {};
1029 if (_likely_(d
->fd
>= 0))
1032 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1036 ev
.events
= EPOLLIN
;
1039 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1049 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1052 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1055 _public_
int sd_event_add_time(
1057 sd_event_source
**ret
,
1061 sd_event_time_handler_t callback
,
1064 EventSourceType type
;
1066 struct clock_data
*d
;
1069 assert_return(e
, -EINVAL
);
1070 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1071 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1072 assert_return(!event_pid_changed(e
), -ECHILD
);
1074 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1077 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1082 callback
= time_exit_callback
;
1084 d
= event_get_clock_data(e
, type
);
1087 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1091 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1096 r
= event_setup_timer_fd(e
, d
, clock
);
1101 s
= source_new(e
, !ret
, type
);
1105 s
->time
.next
= usec
;
1106 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1107 s
->time
.callback
= callback
;
1108 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1109 s
->userdata
= userdata
;
1110 s
->enabled
= SD_EVENT_ONESHOT
;
1112 d
->needs_rearm
= true;
1114 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1118 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1132 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1135 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1138 _public_
int sd_event_add_signal(
1140 sd_event_source
**ret
,
1142 sd_event_signal_handler_t callback
,
1146 struct signal_data
*d
;
1150 assert_return(e
, -EINVAL
);
1151 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1152 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1153 assert_return(!event_pid_changed(e
), -ECHILD
);
1156 callback
= signal_exit_callback
;
1158 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1162 if (!sigismember(&ss
, sig
))
1165 if (!e
->signal_sources
) {
1166 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1167 if (!e
->signal_sources
)
1169 } else if (e
->signal_sources
[sig
])
1172 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1176 s
->signal
.sig
= sig
;
1177 s
->signal
.callback
= callback
;
1178 s
->userdata
= userdata
;
1179 s
->enabled
= SD_EVENT_ON
;
1181 e
->signal_sources
[sig
] = s
;
1183 r
= event_make_signal_data(e
, sig
, &d
);
1189 /* Use the signal name as description for the event source by default */
1190 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1198 _public_
int sd_event_add_child(
1200 sd_event_source
**ret
,
1203 sd_event_child_handler_t callback
,
1209 assert_return(e
, -EINVAL
);
1210 assert_return(pid
> 1, -EINVAL
);
1211 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1212 assert_return(options
!= 0, -EINVAL
);
1213 assert_return(callback
, -EINVAL
);
1214 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1215 assert_return(!event_pid_changed(e
), -ECHILD
);
1217 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1221 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1224 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1229 s
->child
.options
= options
;
1230 s
->child
.callback
= callback
;
1231 s
->userdata
= userdata
;
1232 s
->enabled
= SD_EVENT_ONESHOT
;
1234 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1240 e
->n_enabled_child_sources
++;
1242 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1244 e
->n_enabled_child_sources
--;
1249 e
->need_process_child
= true;
1257 _public_
int sd_event_add_defer(
1259 sd_event_source
**ret
,
1260 sd_event_handler_t callback
,
1266 assert_return(e
, -EINVAL
);
1267 assert_return(callback
, -EINVAL
);
1268 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1269 assert_return(!event_pid_changed(e
), -ECHILD
);
1271 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1275 s
->defer
.callback
= callback
;
1276 s
->userdata
= userdata
;
1277 s
->enabled
= SD_EVENT_ONESHOT
;
1279 r
= source_set_pending(s
, true);
1291 _public_
int sd_event_add_post(
1293 sd_event_source
**ret
,
1294 sd_event_handler_t callback
,
1300 assert_return(e
, -EINVAL
);
1301 assert_return(callback
, -EINVAL
);
1302 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1303 assert_return(!event_pid_changed(e
), -ECHILD
);
1305 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1309 s
= source_new(e
, !ret
, SOURCE_POST
);
1313 s
->post
.callback
= callback
;
1314 s
->userdata
= userdata
;
1315 s
->enabled
= SD_EVENT_ON
;
1317 r
= set_put(e
->post_sources
, s
);
1329 _public_
int sd_event_add_exit(
1331 sd_event_source
**ret
,
1332 sd_event_handler_t callback
,
1338 assert_return(e
, -EINVAL
);
1339 assert_return(callback
, -EINVAL
);
1340 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1341 assert_return(!event_pid_changed(e
), -ECHILD
);
1343 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1347 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1351 s
->exit
.callback
= callback
;
1352 s
->userdata
= userdata
;
1353 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1354 s
->enabled
= SD_EVENT_ONESHOT
;
1356 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1368 _public_ sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
1373 assert(s
->n_ref
>= 1);
1379 _public_ sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
1384 assert(s
->n_ref
>= 1);
1387 if (s
->n_ref
<= 0) {
1388 /* Here's a special hack: when we are called from a
1389 * dispatch handler we won't free the event source
1390 * immediately, but we will detach the fd from the
1391 * epoll. This way it is safe for the caller to unref
1392 * the event source and immediately close the fd, but
1393 * we still retain a valid event source object after
1396 if (s
->dispatching
) {
1397 if (s
->type
== SOURCE_IO
)
1398 source_io_unregister(s
);
1400 source_disconnect(s
);
1408 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1409 assert_return(s
, -EINVAL
);
1410 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1412 return free_and_strdup(&s
->description
, description
);
1415 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1416 assert_return(s
, -EINVAL
);
1417 assert_return(description
, -EINVAL
);
1418 assert_return(s
->description
, -ENXIO
);
1419 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1421 *description
= s
->description
;
1425 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1426 assert_return(s
, NULL
);
1431 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1432 assert_return(s
, -EINVAL
);
1433 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1434 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1435 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1440 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1441 assert_return(s
, -EINVAL
);
1442 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1443 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1448 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1451 assert_return(s
, -EINVAL
);
1452 assert_return(fd
>= 0, -EBADF
);
1453 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1454 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1459 if (s
->enabled
== SD_EVENT_OFF
) {
1461 s
->io
.registered
= false;
1465 saved_fd
= s
->io
.fd
;
1466 assert(s
->io
.registered
);
1469 s
->io
.registered
= false;
1471 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1473 s
->io
.fd
= saved_fd
;
1474 s
->io
.registered
= true;
1478 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1484 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1485 assert_return(s
, -EINVAL
);
1486 assert_return(events
, -EINVAL
);
1487 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1488 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1490 *events
= s
->io
.events
;
1494 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1497 assert_return(s
, -EINVAL
);
1498 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1499 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1500 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1501 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1503 /* edge-triggered updates are never skipped, so we can reset edges */
1504 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1507 if (s
->enabled
!= SD_EVENT_OFF
) {
1508 r
= source_io_register(s
, s
->enabled
, events
);
1513 s
->io
.events
= events
;
1514 source_set_pending(s
, false);
1519 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1520 assert_return(s
, -EINVAL
);
1521 assert_return(revents
, -EINVAL
);
1522 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1523 assert_return(s
->pending
, -ENODATA
);
1524 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1526 *revents
= s
->io
.revents
;
1530 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1531 assert_return(s
, -EINVAL
);
1532 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1533 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1535 return s
->signal
.sig
;
1538 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1539 assert_return(s
, -EINVAL
);
1540 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1542 *priority
= s
->priority
;
1546 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1549 assert_return(s
, -EINVAL
);
1550 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1551 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1553 if (s
->priority
== priority
)
1556 if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1557 struct signal_data
*old
, *d
;
1559 /* Move us from the signalfd belonging to the old
1560 * priority to the signalfd of the new priority */
1562 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1564 s
->priority
= priority
;
1566 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1568 s
->priority
= old
->priority
;
1572 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1574 s
->priority
= priority
;
1577 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1580 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1582 if (s
->type
== SOURCE_EXIT
)
1583 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1588 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1589 assert_return(s
, -EINVAL
);
1590 assert_return(m
, -EINVAL
);
1591 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1597 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1600 assert_return(s
, -EINVAL
);
1601 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
1602 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1604 /* If we are dead anyway, we are fine with turning off
1605 * sources, but everything else needs to fail. */
1606 if (s
->event
->state
== SD_EVENT_FINISHED
)
1607 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1609 if (s
->enabled
== m
)
1612 if (m
== SD_EVENT_OFF
) {
1617 source_io_unregister(s
);
1621 case SOURCE_TIME_REALTIME
:
1622 case SOURCE_TIME_BOOTTIME
:
1623 case SOURCE_TIME_MONOTONIC
:
1624 case SOURCE_TIME_REALTIME_ALARM
:
1625 case SOURCE_TIME_BOOTTIME_ALARM
: {
1626 struct clock_data
*d
;
1629 d
= event_get_clock_data(s
->event
, s
->type
);
1632 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1633 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1634 d
->needs_rearm
= true;
1641 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1647 assert(s
->event
->n_enabled_child_sources
> 0);
1648 s
->event
->n_enabled_child_sources
--;
1650 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1655 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1664 assert_not_reached("Wut? I shouldn't exist.");
1671 r
= source_io_register(s
, m
, s
->io
.events
);
1678 case SOURCE_TIME_REALTIME
:
1679 case SOURCE_TIME_BOOTTIME
:
1680 case SOURCE_TIME_MONOTONIC
:
1681 case SOURCE_TIME_REALTIME_ALARM
:
1682 case SOURCE_TIME_BOOTTIME_ALARM
: {
1683 struct clock_data
*d
;
1686 d
= event_get_clock_data(s
->event
, s
->type
);
1689 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1690 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1691 d
->needs_rearm
= true;
1699 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
1701 s
->enabled
= SD_EVENT_OFF
;
1702 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1710 if (s
->enabled
== SD_EVENT_OFF
)
1711 s
->event
->n_enabled_child_sources
++;
1715 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
1717 s
->enabled
= SD_EVENT_OFF
;
1718 s
->event
->n_enabled_child_sources
--;
1719 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1727 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1736 assert_not_reached("Wut? I shouldn't exist.");
1741 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1744 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1749 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
1750 assert_return(s
, -EINVAL
);
1751 assert_return(usec
, -EINVAL
);
1752 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1753 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1755 *usec
= s
->time
.next
;
1759 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
1760 struct clock_data
*d
;
1762 assert_return(s
, -EINVAL
);
1763 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1764 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1765 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1767 s
->time
.next
= usec
;
1769 source_set_pending(s
, false);
1771 d
= event_get_clock_data(s
->event
, s
->type
);
1774 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1775 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1776 d
->needs_rearm
= true;
1781 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
1782 assert_return(s
, -EINVAL
);
1783 assert_return(usec
, -EINVAL
);
1784 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1785 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1787 *usec
= s
->time
.accuracy
;
1791 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
1792 struct clock_data
*d
;
1794 assert_return(s
, -EINVAL
);
1795 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1796 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1797 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1798 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1801 usec
= DEFAULT_ACCURACY_USEC
;
1803 s
->time
.accuracy
= usec
;
1805 source_set_pending(s
, false);
1807 d
= event_get_clock_data(s
->event
, s
->type
);
1810 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1811 d
->needs_rearm
= true;
1816 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
1817 assert_return(s
, -EINVAL
);
1818 assert_return(clock
, -EINVAL
);
1819 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1820 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1822 *clock
= event_source_type_to_clock(s
->type
);
1826 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
1827 assert_return(s
, -EINVAL
);
1828 assert_return(pid
, -EINVAL
);
1829 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
1830 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1832 *pid
= s
->child
.pid
;
1836 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
1839 assert_return(s
, -EINVAL
);
1840 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1841 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1842 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1844 if (s
->prepare
== callback
)
1847 if (callback
&& s
->prepare
) {
1848 s
->prepare
= callback
;
1852 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
1856 s
->prepare
= callback
;
1859 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
1863 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1868 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
1869 assert_return(s
, NULL
);
1874 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
1877 assert_return(s
, NULL
);
1880 s
->userdata
= userdata
;
1885 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
1892 if (a
>= USEC_INFINITY
)
1893 return USEC_INFINITY
;
1898 initialize_perturb(e
);
1901 Find a good time to wake up again between times a and b. We
1902 have two goals here:
1904 a) We want to wake up as seldom as possible, hence prefer
1905 later times over earlier times.
1907 b) But if we have to wake up, then let's make sure to
1908 dispatch as much as possible on the entire system.
1910 We implement this by waking up everywhere at the same time
1911 within any given minute if we can, synchronised via the
1912 perturbation value determined from the boot ID. If we can't,
1913 then we try to find the same spot in every 10s, then 1s and
1914 then 250ms step. Otherwise, we pick the last possible time
1918 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
1920 if (_unlikely_(c
< USEC_PER_MINUTE
))
1923 c
-= USEC_PER_MINUTE
;
1929 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
1931 if (_unlikely_(c
< USEC_PER_SEC
*10))
1934 c
-= USEC_PER_SEC
*10;
1940 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
1942 if (_unlikely_(c
< USEC_PER_SEC
))
1951 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
1953 if (_unlikely_(c
< USEC_PER_MSEC
*250))
1956 c
-= USEC_PER_MSEC
*250;
1965 static int event_arm_timer(
1967 struct clock_data
*d
) {
1969 struct itimerspec its
= {};
1970 sd_event_source
*a
, *b
;
1977 if (!d
->needs_rearm
)
1980 d
->needs_rearm
= false;
1982 a
= prioq_peek(d
->earliest
);
1983 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
1988 if (d
->next
== USEC_INFINITY
)
1992 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1996 d
->next
= USEC_INFINITY
;
2000 b
= prioq_peek(d
->latest
);
2001 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2003 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2007 assert_se(d
->fd
>= 0);
2010 /* We don' want to disarm here, just mean some time looooong ago. */
2011 its
.it_value
.tv_sec
= 0;
2012 its
.it_value
.tv_nsec
= 1;
2014 timespec_store(&its
.it_value
, t
);
2016 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2024 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2027 assert(s
->type
== SOURCE_IO
);
2029 /* If the event source was already pending, we just OR in the
2030 * new revents, otherwise we reset the value. The ORing is
2031 * necessary to handle EPOLLONESHOT events properly where
2032 * readability might happen independently of writability, and
2033 * we need to keep track of both */
2036 s
->io
.revents
|= revents
;
2038 s
->io
.revents
= revents
;
2040 return source_set_pending(s
, true);
2043 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2050 assert_return(events
== EPOLLIN
, -EIO
);
2052 ss
= read(fd
, &x
, sizeof(x
));
2054 if (IN_SET(errno
, EAGAIN
, EINTR
))
2060 if (_unlikely_(ss
!= sizeof(x
)))
2064 *next
= USEC_INFINITY
;
2069 static int process_timer(
2072 struct clock_data
*d
) {
2081 s
= prioq_peek(d
->earliest
);
2084 s
->enabled
== SD_EVENT_OFF
||
2088 r
= source_set_pending(s
, true);
2092 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2093 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2094 d
->needs_rearm
= true;
2100 static int process_child(sd_event
*e
) {
2107 e
->need_process_child
= false;
2110 So, this is ugly. We iteratively invoke waitid() with P_PID
2111 + WNOHANG for each PID we wait for, instead of using
2112 P_ALL. This is because we only want to get child
2113 information of very specific child processes, and not all
2114 of them. We might not have processed the SIGCHLD even of a
2115 previous invocation and we don't want to maintain a
2116 unbounded *per-child* event queue, hence we really don't
2117 want anything flushed out of the kernel's queue that we
2118 don't care about. Since this is O(n) this means that if you
2119 have a lot of processes you probably want to handle SIGCHLD
2122 We do not reap the children here (by using WNOWAIT), this
2123 is only done after the event source is dispatched so that
2124 the callback still sees the process as a zombie.
2127 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2128 assert(s
->type
== SOURCE_CHILD
);
2133 if (s
->enabled
== SD_EVENT_OFF
)
2136 zero(s
->child
.siginfo
);
2137 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2138 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2142 if (s
->child
.siginfo
.si_pid
!= 0) {
2143 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2145 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2146 /* If the child isn't dead then let's
2147 * immediately remove the state change
2148 * from the queue, since there's no
2149 * benefit in leaving it queued */
2151 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2152 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2155 r
= source_set_pending(s
, true);
2164 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2165 bool read_one
= false;
2169 assert_return(events
== EPOLLIN
, -EIO
);
2171 /* If there's a signal queued on this priority and SIGCHLD is
2172 on this priority too, then make sure to recheck the
2173 children we watch. This is because we only ever dequeue
2174 the first signal per priority, and if we dequeue one, and
2175 SIGCHLD might be enqueued later we wouldn't know, but we
2176 might have higher priority children we care about hence we
2177 need to check that explicitly. */
2179 if (sigismember(&d
->sigset
, SIGCHLD
))
2180 e
->need_process_child
= true;
2182 /* If there's already an event source pending for this
2183 * priority we don't read another */
2188 struct signalfd_siginfo si
;
2190 sd_event_source
*s
= NULL
;
2192 n
= read(d
->fd
, &si
, sizeof(si
));
2194 if (IN_SET(errno
, EAGAIN
, EINTR
))
2200 if (_unlikely_(n
!= sizeof(si
)))
2203 assert(SIGNAL_VALID(si
.ssi_signo
));
2207 if (e
->signal_sources
)
2208 s
= e
->signal_sources
[si
.ssi_signo
];
2214 s
->signal
.siginfo
= si
;
2217 r
= source_set_pending(s
, true);
2225 static int source_dispatch(sd_event_source
*s
) {
2226 EventSourceType saved_type
;
2230 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2232 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2234 saved_type
= s
->type
;
2236 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2237 r
= source_set_pending(s
, false);
2242 if (s
->type
!= SOURCE_POST
) {
2246 /* If we execute a non-post source, let's mark all
2247 * post sources as pending */
2249 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2250 if (z
->enabled
== SD_EVENT_OFF
)
2253 r
= source_set_pending(z
, true);
2259 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2260 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2265 s
->dispatching
= true;
2270 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2273 case SOURCE_TIME_REALTIME
:
2274 case SOURCE_TIME_BOOTTIME
:
2275 case SOURCE_TIME_MONOTONIC
:
2276 case SOURCE_TIME_REALTIME_ALARM
:
2277 case SOURCE_TIME_BOOTTIME_ALARM
:
2278 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2282 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2285 case SOURCE_CHILD
: {
2288 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2290 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2292 /* Now, reap the PID for good. */
2294 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2300 r
= s
->defer
.callback(s
, s
->userdata
);
2304 r
= s
->post
.callback(s
, s
->userdata
);
2308 r
= s
->exit
.callback(s
, s
->userdata
);
2311 case SOURCE_WATCHDOG
:
2312 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2313 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2314 assert_not_reached("Wut? I shouldn't exist.");
2317 s
->dispatching
= false;
2320 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
2321 strna(s
->description
), event_source_type_to_string(saved_type
));
2326 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2331 static int event_prepare(sd_event
*e
) {
2339 s
= prioq_peek(e
->prepare
);
2340 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2343 s
->prepare_iteration
= e
->iteration
;
2344 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2350 s
->dispatching
= true;
2351 r
= s
->prepare(s
, s
->userdata
);
2352 s
->dispatching
= false;
2355 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2356 strna(s
->description
), event_source_type_to_string(s
->type
));
2361 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2367 static int dispatch_exit(sd_event
*e
) {
2373 p
= prioq_peek(e
->exit
);
2374 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2375 e
->state
= SD_EVENT_FINISHED
;
2381 e
->state
= SD_EVENT_EXITING
;
2383 r
= source_dispatch(p
);
2385 e
->state
= SD_EVENT_INITIAL
;
2391 static sd_event_source
* event_next_pending(sd_event
*e
) {
2396 p
= prioq_peek(e
->pending
);
2400 if (p
->enabled
== SD_EVENT_OFF
)
2406 static int arm_watchdog(sd_event
*e
) {
2407 struct itimerspec its
= {};
2412 assert(e
->watchdog_fd
>= 0);
2414 t
= sleep_between(e
,
2415 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2416 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2418 timespec_store(&its
.it_value
, t
);
2420 /* Make sure we never set the watchdog to 0, which tells the
2421 * kernel to disable it. */
2422 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
2423 its
.it_value
.tv_nsec
= 1;
2425 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2432 static int process_watchdog(sd_event
*e
) {
2438 /* Don't notify watchdog too often */
2439 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
2442 sd_notify(false, "WATCHDOG=1");
2443 e
->watchdog_last
= e
->timestamp
.monotonic
;
2445 return arm_watchdog(e
);
2448 _public_
int sd_event_prepare(sd_event
*e
) {
2451 assert_return(e
, -EINVAL
);
2452 assert_return(!event_pid_changed(e
), -ECHILD
);
2453 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2454 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2456 if (e
->exit_requested
)
2461 e
->state
= SD_EVENT_PREPARING
;
2462 r
= event_prepare(e
);
2463 e
->state
= SD_EVENT_INITIAL
;
2467 r
= event_arm_timer(e
, &e
->realtime
);
2471 r
= event_arm_timer(e
, &e
->boottime
);
2475 r
= event_arm_timer(e
, &e
->monotonic
);
2479 r
= event_arm_timer(e
, &e
->realtime_alarm
);
2483 r
= event_arm_timer(e
, &e
->boottime_alarm
);
2487 if (event_next_pending(e
) || e
->need_process_child
)
2490 e
->state
= SD_EVENT_ARMED
;
2495 e
->state
= SD_EVENT_ARMED
;
2496 r
= sd_event_wait(e
, 0);
2498 e
->state
= SD_EVENT_ARMED
;
2503 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
2504 struct epoll_event
*ev_queue
;
2505 unsigned ev_queue_max
;
2508 assert_return(e
, -EINVAL
);
2509 assert_return(!event_pid_changed(e
), -ECHILD
);
2510 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2511 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
2513 if (e
->exit_requested
) {
2514 e
->state
= SD_EVENT_PENDING
;
2518 ev_queue_max
= MAX(e
->n_sources
, 1u);
2519 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
2521 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
2522 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
2524 if (errno
== EINTR
) {
2525 e
->state
= SD_EVENT_PENDING
;
2533 triple_timestamp_get(&e
->timestamp
);
2535 for (i
= 0; i
< m
; i
++) {
2537 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
2538 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
2540 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
2544 case WAKEUP_EVENT_SOURCE
:
2545 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2548 case WAKEUP_CLOCK_DATA
: {
2549 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
2550 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
2554 case WAKEUP_SIGNAL_DATA
:
2555 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2559 assert_not_reached("Invalid wake-up pointer");
2566 r
= process_watchdog(e
);
2570 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
2574 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
2578 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
2582 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
2586 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
2590 if (e
->need_process_child
) {
2591 r
= process_child(e
);
2596 if (event_next_pending(e
)) {
2597 e
->state
= SD_EVENT_PENDING
;
2605 e
->state
= SD_EVENT_INITIAL
;
2610 _public_
int sd_event_dispatch(sd_event
*e
) {
2614 assert_return(e
, -EINVAL
);
2615 assert_return(!event_pid_changed(e
), -ECHILD
);
2616 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2617 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
2619 if (e
->exit_requested
)
2620 return dispatch_exit(e
);
2622 p
= event_next_pending(e
);
2626 e
->state
= SD_EVENT_RUNNING
;
2627 r
= source_dispatch(p
);
2628 e
->state
= SD_EVENT_INITIAL
;
2635 e
->state
= SD_EVENT_INITIAL
;
2640 static void event_log_delays(sd_event
*e
) {
2641 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1];
2645 for (i
= o
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
2646 o
+= snprintf(&b
[o
], sizeof(b
) - o
, "%u ", e
->delays
[i
]);
2649 log_debug("Event loop iterations: %.*s", o
, b
);
2652 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
2655 assert_return(e
, -EINVAL
);
2656 assert_return(!event_pid_changed(e
), -ECHILD
);
2657 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2658 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2660 if (e
->profile_delays
&& e
->last_run
) {
2664 this_run
= now(CLOCK_MONOTONIC
);
2666 l
= u64log2(this_run
- e
->last_run
);
2667 assert(l
< sizeof(e
->delays
));
2670 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
2671 event_log_delays(e
);
2672 e
->last_log
= this_run
;
2676 r
= sd_event_prepare(e
);
2678 /* There was nothing? Then wait... */
2679 r
= sd_event_wait(e
, timeout
);
2681 if (e
->profile_delays
)
2682 e
->last_run
= now(CLOCK_MONOTONIC
);
2685 /* There's something now, then let's dispatch it */
2686 r
= sd_event_dispatch(e
);
2696 _public_
int sd_event_loop(sd_event
*e
) {
2699 assert_return(e
, -EINVAL
);
2700 assert_return(!event_pid_changed(e
), -ECHILD
);
2701 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2705 while (e
->state
!= SD_EVENT_FINISHED
) {
2706 r
= sd_event_run(e
, (uint64_t) -1);
2718 _public_
int sd_event_get_fd(sd_event
*e
) {
2720 assert_return(e
, -EINVAL
);
2721 assert_return(!event_pid_changed(e
), -ECHILD
);
2726 _public_
int sd_event_get_state(sd_event
*e
) {
2727 assert_return(e
, -EINVAL
);
2728 assert_return(!event_pid_changed(e
), -ECHILD
);
2733 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
2734 assert_return(e
, -EINVAL
);
2735 assert_return(code
, -EINVAL
);
2736 assert_return(!event_pid_changed(e
), -ECHILD
);
2738 if (!e
->exit_requested
)
2741 *code
= e
->exit_code
;
2745 _public_
int sd_event_exit(sd_event
*e
, int code
) {
2746 assert_return(e
, -EINVAL
);
2747 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2748 assert_return(!event_pid_changed(e
), -ECHILD
);
2750 e
->exit_requested
= true;
2751 e
->exit_code
= code
;
2756 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
2757 assert_return(e
, -EINVAL
);
2758 assert_return(usec
, -EINVAL
);
2759 assert_return(!event_pid_changed(e
), -ECHILD
);
2761 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
2764 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2765 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2766 * the purpose of getting the time this doesn't matter. */
2767 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
2770 if (!triple_timestamp_is_set(&e
->timestamp
)) {
2771 /* Implicitly fall back to now() if we never ran
2772 * before and thus have no cached time. */
2777 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
2781 _public_
int sd_event_default(sd_event
**ret
) {
2783 static thread_local sd_event
*default_event
= NULL
;
2788 return !!default_event
;
2790 if (default_event
) {
2791 *ret
= sd_event_ref(default_event
);
2795 r
= sd_event_new(&e
);
2799 e
->default_event_ptr
= &default_event
;
2807 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
2808 assert_return(e
, -EINVAL
);
2809 assert_return(tid
, -EINVAL
);
2810 assert_return(!event_pid_changed(e
), -ECHILD
);
2820 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
2823 assert_return(e
, -EINVAL
);
2824 assert_return(!event_pid_changed(e
), -ECHILD
);
2826 if (e
->watchdog
== !!b
)
2830 struct epoll_event ev
= {};
2832 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
2836 /* Issue first ping immediately */
2837 sd_notify(false, "WATCHDOG=1");
2838 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
2840 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
2841 if (e
->watchdog_fd
< 0)
2844 r
= arm_watchdog(e
);
2848 ev
.events
= EPOLLIN
;
2849 ev
.data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
);
2851 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
2858 if (e
->watchdog_fd
>= 0) {
2859 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
2860 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2868 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2872 _public_
int sd_event_get_watchdog(sd_event
*e
) {
2873 assert_return(e
, -EINVAL
);
2874 assert_return(!event_pid_changed(e
), -ECHILD
);
2879 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
2880 assert_return(e
, -EINVAL
);
2881 assert_return(!event_pid_changed(e
), -ECHILD
);
2883 *ret
= e
->iteration
;