1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <sys/epoll.h>
22 #include <sys/timerfd.h>
25 #include "sd-daemon.h"
29 #include "alloc-util.h"
36 #include "process-util.h"
38 #include "signal-util.h"
39 #include "string-table.h"
40 #include "string-util.h"
41 #include "time-util.h"
44 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
46 typedef enum EventSourceType
{
50 SOURCE_TIME_MONOTONIC
,
51 SOURCE_TIME_REALTIME_ALARM
,
52 SOURCE_TIME_BOOTTIME_ALARM
,
59 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
60 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
63 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
65 [SOURCE_TIME_REALTIME
] = "realtime",
66 [SOURCE_TIME_BOOTTIME
] = "bootime",
67 [SOURCE_TIME_MONOTONIC
] = "monotonic",
68 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
69 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
70 [SOURCE_SIGNAL
] = "signal",
71 [SOURCE_CHILD
] = "child",
72 [SOURCE_DEFER
] = "defer",
73 [SOURCE_POST
] = "post",
74 [SOURCE_EXIT
] = "exit",
75 [SOURCE_WATCHDOG
] = "watchdog",
78 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
80 /* All objects we use in epoll events start with this value, so that
81 * we know how to dispatch it */
82 typedef enum WakeupType
{
88 _WAKEUP_TYPE_INVALID
= -1,
91 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
93 struct sd_event_source
{
100 sd_event_handler_t prepare
;
104 EventSourceType type
:5;
111 unsigned pending_index
;
112 unsigned prepare_index
;
113 uint64_t pending_iteration
;
114 uint64_t prepare_iteration
;
116 LIST_FIELDS(sd_event_source
, sources
);
120 sd_event_io_handler_t callback
;
128 sd_event_time_handler_t callback
;
129 usec_t next
, accuracy
;
130 unsigned earliest_index
;
131 unsigned latest_index
;
134 sd_event_signal_handler_t callback
;
135 struct signalfd_siginfo siginfo
;
139 sd_event_child_handler_t callback
;
145 sd_event_handler_t callback
;
148 sd_event_handler_t callback
;
151 sd_event_handler_t callback
;
152 unsigned prioq_index
;
161 /* For all clocks we maintain two priority queues each, one
162 * ordered for the earliest times the events may be
163 * dispatched, and one ordered by the latest times they must
164 * have been dispatched. The range between the top entries in
165 * the two prioqs is the time window we can freely schedule
178 /* For each priority we maintain one signal fd, so that we
179 * only have to dequeue a single event per priority at a
185 sd_event_source
*current
;
197 /* timerfd_create() only supports these five clocks so far. We
198 * can add support for more clocks when the kernel learns to
199 * deal with them, too. */
200 struct clock_data realtime
;
201 struct clock_data boottime
;
202 struct clock_data monotonic
;
203 struct clock_data realtime_alarm
;
204 struct clock_data boottime_alarm
;
208 sd_event_source
**signal_sources
; /* indexed by signal number */
209 Hashmap
*signal_data
; /* indexed by priority */
211 Hashmap
*child_sources
;
212 unsigned n_enabled_child_sources
;
221 triple_timestamp timestamp
;
224 bool exit_requested
:1;
225 bool need_process_child
:1;
227 bool profile_delays
:1;
232 sd_event
**default_event_ptr
;
234 usec_t watchdog_last
, watchdog_period
;
238 LIST_HEAD(sd_event_source
, sources
);
240 usec_t last_run
, last_log
;
241 unsigned delays
[sizeof(usec_t
) * 8];
244 static thread_local sd_event
*default_event
= NULL
;
246 static void source_disconnect(sd_event_source
*s
);
248 static sd_event
*event_resolve(sd_event
*e
) {
249 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
252 static int pending_prioq_compare(const void *a
, const void *b
) {
253 const sd_event_source
*x
= a
, *y
= b
;
258 /* Enabled ones first */
259 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
261 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
264 /* Lower priority values first */
265 if (x
->priority
< y
->priority
)
267 if (x
->priority
> y
->priority
)
270 /* Older entries first */
271 if (x
->pending_iteration
< y
->pending_iteration
)
273 if (x
->pending_iteration
> y
->pending_iteration
)
279 static int prepare_prioq_compare(const void *a
, const void *b
) {
280 const sd_event_source
*x
= a
, *y
= b
;
285 /* Enabled ones first */
286 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
288 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
291 /* Move most recently prepared ones last, so that we can stop
292 * preparing as soon as we hit one that has already been
293 * prepared in the current iteration */
294 if (x
->prepare_iteration
< y
->prepare_iteration
)
296 if (x
->prepare_iteration
> y
->prepare_iteration
)
299 /* Lower priority values first */
300 if (x
->priority
< y
->priority
)
302 if (x
->priority
> y
->priority
)
308 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
309 const sd_event_source
*x
= a
, *y
= b
;
311 assert(EVENT_SOURCE_IS_TIME(x
->type
));
312 assert(x
->type
== y
->type
);
314 /* Enabled ones first */
315 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
317 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
320 /* Move the pending ones to the end */
321 if (!x
->pending
&& y
->pending
)
323 if (x
->pending
&& !y
->pending
)
327 if (x
->time
.next
< y
->time
.next
)
329 if (x
->time
.next
> y
->time
.next
)
335 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
336 return usec_add(s
->time
.next
, s
->time
.accuracy
);
339 static int latest_time_prioq_compare(const void *a
, const void *b
) {
340 const sd_event_source
*x
= a
, *y
= b
;
342 assert(EVENT_SOURCE_IS_TIME(x
->type
));
343 assert(x
->type
== y
->type
);
345 /* Enabled ones first */
346 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
348 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
351 /* Move the pending ones to the end */
352 if (!x
->pending
&& y
->pending
)
354 if (x
->pending
&& !y
->pending
)
358 if (time_event_source_latest(x
) < time_event_source_latest(y
))
360 if (time_event_source_latest(x
) > time_event_source_latest(y
))
366 static int exit_prioq_compare(const void *a
, const void *b
) {
367 const sd_event_source
*x
= a
, *y
= b
;
369 assert(x
->type
== SOURCE_EXIT
);
370 assert(y
->type
== SOURCE_EXIT
);
372 /* Enabled ones first */
373 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
375 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
378 /* Lower priority values first */
379 if (x
->priority
< y
->priority
)
381 if (x
->priority
> y
->priority
)
387 static void free_clock_data(struct clock_data
*d
) {
389 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
392 prioq_free(d
->earliest
);
393 prioq_free(d
->latest
);
396 static void event_free(sd_event
*e
) {
401 while ((s
= e
->sources
)) {
403 source_disconnect(s
);
404 sd_event_source_unref(s
);
407 assert(e
->n_sources
== 0);
409 if (e
->default_event_ptr
)
410 *(e
->default_event_ptr
) = NULL
;
412 safe_close(e
->epoll_fd
);
413 safe_close(e
->watchdog_fd
);
415 free_clock_data(&e
->realtime
);
416 free_clock_data(&e
->boottime
);
417 free_clock_data(&e
->monotonic
);
418 free_clock_data(&e
->realtime_alarm
);
419 free_clock_data(&e
->boottime_alarm
);
421 prioq_free(e
->pending
);
422 prioq_free(e
->prepare
);
425 free(e
->signal_sources
);
426 hashmap_free(e
->signal_data
);
428 hashmap_free(e
->child_sources
);
429 set_free(e
->post_sources
);
433 _public_
int sd_event_new(sd_event
** ret
) {
437 assert_return(ret
, -EINVAL
);
439 e
= new0(sd_event
, 1);
444 e
->watchdog_fd
= e
->epoll_fd
= e
->realtime
.fd
= e
->boottime
.fd
= e
->monotonic
.fd
= e
->realtime_alarm
.fd
= e
->boottime_alarm
.fd
= -1;
445 e
->realtime
.next
= e
->boottime
.next
= e
->monotonic
.next
= e
->realtime_alarm
.next
= e
->boottime_alarm
.next
= USEC_INFINITY
;
446 e
->realtime
.wakeup
= e
->boottime
.wakeup
= e
->monotonic
.wakeup
= e
->realtime_alarm
.wakeup
= e
->boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
;
447 e
->original_pid
= getpid_cached();
448 e
->perturb
= USEC_INFINITY
;
450 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
454 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
455 if (e
->epoll_fd
< 0) {
460 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
461 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
462 e
->profile_delays
= true;
473 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
478 assert(e
->n_ref
>= 1);
484 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
489 assert(e
->n_ref
>= 1);
498 static bool event_pid_changed(sd_event
*e
) {
501 /* We don't support people creating an event loop and keeping
502 * it around over a fork(). Let's complain. */
504 return e
->original_pid
!= getpid_cached();
507 static void source_io_unregister(sd_event_source
*s
) {
511 assert(s
->type
== SOURCE_IO
);
513 if (event_pid_changed(s
->event
))
516 if (!s
->io
.registered
)
519 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
521 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
522 strna(s
->description
), event_source_type_to_string(s
->type
));
524 s
->io
.registered
= false;
527 static int source_io_register(
532 struct epoll_event ev
= {};
536 assert(s
->type
== SOURCE_IO
);
537 assert(enabled
!= SD_EVENT_OFF
);
542 if (enabled
== SD_EVENT_ONESHOT
)
543 ev
.events
|= EPOLLONESHOT
;
545 if (s
->io
.registered
)
546 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
548 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
552 s
->io
.registered
= true;
557 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
561 case SOURCE_TIME_REALTIME
:
562 return CLOCK_REALTIME
;
564 case SOURCE_TIME_BOOTTIME
:
565 return CLOCK_BOOTTIME
;
567 case SOURCE_TIME_MONOTONIC
:
568 return CLOCK_MONOTONIC
;
570 case SOURCE_TIME_REALTIME_ALARM
:
571 return CLOCK_REALTIME_ALARM
;
573 case SOURCE_TIME_BOOTTIME_ALARM
:
574 return CLOCK_BOOTTIME_ALARM
;
577 return (clockid_t
) -1;
581 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
586 return SOURCE_TIME_REALTIME
;
589 return SOURCE_TIME_BOOTTIME
;
591 case CLOCK_MONOTONIC
:
592 return SOURCE_TIME_MONOTONIC
;
594 case CLOCK_REALTIME_ALARM
:
595 return SOURCE_TIME_REALTIME_ALARM
;
597 case CLOCK_BOOTTIME_ALARM
:
598 return SOURCE_TIME_BOOTTIME_ALARM
;
601 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
605 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
610 case SOURCE_TIME_REALTIME
:
613 case SOURCE_TIME_BOOTTIME
:
616 case SOURCE_TIME_MONOTONIC
:
617 return &e
->monotonic
;
619 case SOURCE_TIME_REALTIME_ALARM
:
620 return &e
->realtime_alarm
;
622 case SOURCE_TIME_BOOTTIME_ALARM
:
623 return &e
->boottime_alarm
;
630 static int event_make_signal_data(
633 struct signal_data
**ret
) {
635 struct epoll_event ev
= {};
636 struct signal_data
*d
;
644 if (event_pid_changed(e
))
647 if (e
->signal_sources
&& e
->signal_sources
[sig
])
648 priority
= e
->signal_sources
[sig
]->priority
;
652 d
= hashmap_get(e
->signal_data
, &priority
);
654 if (sigismember(&d
->sigset
, sig
) > 0) {
660 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
664 d
= new0(struct signal_data
, 1);
668 d
->wakeup
= WAKEUP_SIGNAL_DATA
;
670 d
->priority
= priority
;
672 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
682 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
684 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
703 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
716 d
->fd
= safe_close(d
->fd
);
717 hashmap_remove(e
->signal_data
, &d
->priority
);
724 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
728 /* Turns off the specified signal in the signal data
729 * object. If the signal mask of the object becomes empty that
732 if (sigismember(&d
->sigset
, sig
) == 0)
735 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
737 if (sigisemptyset(&d
->sigset
)) {
739 /* If all the mask is all-zero we can get rid of the structure */
740 hashmap_remove(e
->signal_data
, &d
->priority
);
748 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
749 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
752 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
753 struct signal_data
*d
;
754 static const int64_t zero_priority
= 0;
758 /* Rechecks if the specified signal is still something we are
759 * interested in. If not, we'll unmask it, and possibly drop
760 * the signalfd for it. */
762 if (sig
== SIGCHLD
&&
763 e
->n_enabled_child_sources
> 0)
766 if (e
->signal_sources
&&
767 e
->signal_sources
[sig
] &&
768 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
772 * The specified signal might be enabled in three different queues:
774 * 1) the one that belongs to the priority passed (if it is non-NULL)
775 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
776 * 3) the 0 priority (to cover the SIGCHLD case)
778 * Hence, let's remove it from all three here.
782 d
= hashmap_get(e
->signal_data
, priority
);
784 event_unmask_signal_data(e
, d
, sig
);
787 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
788 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
790 event_unmask_signal_data(e
, d
, sig
);
793 d
= hashmap_get(e
->signal_data
, &zero_priority
);
795 event_unmask_signal_data(e
, d
, sig
);
798 static void source_disconnect(sd_event_source
*s
) {
806 assert(s
->event
->n_sources
> 0);
812 source_io_unregister(s
);
816 case SOURCE_TIME_REALTIME
:
817 case SOURCE_TIME_BOOTTIME
:
818 case SOURCE_TIME_MONOTONIC
:
819 case SOURCE_TIME_REALTIME_ALARM
:
820 case SOURCE_TIME_BOOTTIME_ALARM
: {
821 struct clock_data
*d
;
823 d
= event_get_clock_data(s
->event
, s
->type
);
826 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
827 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
828 d
->needs_rearm
= true;
833 if (s
->signal
.sig
> 0) {
835 if (s
->event
->signal_sources
)
836 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
838 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
844 if (s
->child
.pid
> 0) {
845 if (s
->enabled
!= SD_EVENT_OFF
) {
846 assert(s
->event
->n_enabled_child_sources
> 0);
847 s
->event
->n_enabled_child_sources
--;
850 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
851 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
861 set_remove(s
->event
->post_sources
, s
);
865 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
869 assert_not_reached("Wut? I shouldn't exist.");
873 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
876 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
880 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
882 LIST_REMOVE(sources
, event
->sources
, s
);
886 sd_event_unref(event
);
889 static void source_free(sd_event_source
*s
) {
892 source_disconnect(s
);
894 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
895 safe_close(s
->io
.fd
);
897 free(s
->description
);
901 static int source_set_pending(sd_event_source
*s
, bool b
) {
905 assert(s
->type
!= SOURCE_EXIT
);
913 s
->pending_iteration
= s
->event
->iteration
;
915 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
921 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
923 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
924 struct clock_data
*d
;
926 d
= event_get_clock_data(s
->event
, s
->type
);
929 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
930 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
931 d
->needs_rearm
= true;
934 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
935 struct signal_data
*d
;
937 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
938 if (d
&& d
->current
== s
)
945 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
950 s
= new0(sd_event_source
, 1);
956 s
->floating
= floating
;
958 s
->pending_index
= s
->prepare_index
= PRIOQ_IDX_NULL
;
963 LIST_PREPEND(sources
, e
->sources
, s
);
969 _public_
int sd_event_add_io(
971 sd_event_source
**ret
,
974 sd_event_io_handler_t callback
,
980 assert_return(e
, -EINVAL
);
981 assert_return(e
= event_resolve(e
), -ENOPKG
);
982 assert_return(fd
>= 0, -EBADF
);
983 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
984 assert_return(callback
, -EINVAL
);
985 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
986 assert_return(!event_pid_changed(e
), -ECHILD
);
988 s
= source_new(e
, !ret
, SOURCE_IO
);
992 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
994 s
->io
.events
= events
;
995 s
->io
.callback
= callback
;
996 s
->userdata
= userdata
;
997 s
->enabled
= SD_EVENT_ON
;
999 r
= source_io_register(s
, s
->enabled
, events
);
1011 static void initialize_perturb(sd_event
*e
) {
1012 sd_id128_t bootid
= {};
1014 /* When we sleep for longer, we try to realign the wakeup to
1015 the same time wihtin each minute/second/250ms, so that
1016 events all across the system can be coalesced into a single
1017 CPU wakeup. However, let's take some system-specific
1018 randomness for this value, so that in a network of systems
1019 with synced clocks timer events are distributed a
1020 bit. Here, we calculate a perturbation usec offset from the
1023 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1026 if (sd_id128_get_boot(&bootid
) >= 0)
1027 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1030 static int event_setup_timer_fd(
1032 struct clock_data
*d
,
1035 struct epoll_event ev
= {};
1041 if (_likely_(d
->fd
>= 0))
1044 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1048 ev
.events
= EPOLLIN
;
1051 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1061 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1064 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1067 _public_
int sd_event_add_time(
1069 sd_event_source
**ret
,
1073 sd_event_time_handler_t callback
,
1076 EventSourceType type
;
1078 struct clock_data
*d
;
1081 assert_return(e
, -EINVAL
);
1082 assert_return(e
= event_resolve(e
), -ENOPKG
);
1083 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1084 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1085 assert_return(!event_pid_changed(e
), -ECHILD
);
1087 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1090 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1095 callback
= time_exit_callback
;
1097 d
= event_get_clock_data(e
, type
);
1100 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1104 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1109 r
= event_setup_timer_fd(e
, d
, clock
);
1114 s
= source_new(e
, !ret
, type
);
1118 s
->time
.next
= usec
;
1119 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1120 s
->time
.callback
= callback
;
1121 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1122 s
->userdata
= userdata
;
1123 s
->enabled
= SD_EVENT_ONESHOT
;
1125 d
->needs_rearm
= true;
1127 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1131 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1145 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1148 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1151 _public_
int sd_event_add_signal(
1153 sd_event_source
**ret
,
1155 sd_event_signal_handler_t callback
,
1159 struct signal_data
*d
;
1163 assert_return(e
, -EINVAL
);
1164 assert_return(e
= event_resolve(e
), -ENOPKG
);
1165 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1166 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1167 assert_return(!event_pid_changed(e
), -ECHILD
);
1170 callback
= signal_exit_callback
;
1172 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1176 if (!sigismember(&ss
, sig
))
1179 if (!e
->signal_sources
) {
1180 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1181 if (!e
->signal_sources
)
1183 } else if (e
->signal_sources
[sig
])
1186 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1190 s
->signal
.sig
= sig
;
1191 s
->signal
.callback
= callback
;
1192 s
->userdata
= userdata
;
1193 s
->enabled
= SD_EVENT_ON
;
1195 e
->signal_sources
[sig
] = s
;
1197 r
= event_make_signal_data(e
, sig
, &d
);
1203 /* Use the signal name as description for the event source by default */
1204 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1212 _public_
int sd_event_add_child(
1214 sd_event_source
**ret
,
1217 sd_event_child_handler_t callback
,
1223 assert_return(e
, -EINVAL
);
1224 assert_return(e
= event_resolve(e
), -ENOPKG
);
1225 assert_return(pid
> 1, -EINVAL
);
1226 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1227 assert_return(options
!= 0, -EINVAL
);
1228 assert_return(callback
, -EINVAL
);
1229 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1230 assert_return(!event_pid_changed(e
), -ECHILD
);
1232 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1236 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1239 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1244 s
->child
.options
= options
;
1245 s
->child
.callback
= callback
;
1246 s
->userdata
= userdata
;
1247 s
->enabled
= SD_EVENT_ONESHOT
;
1249 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1255 e
->n_enabled_child_sources
++;
1257 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1259 e
->n_enabled_child_sources
--;
1264 e
->need_process_child
= true;
1272 _public_
int sd_event_add_defer(
1274 sd_event_source
**ret
,
1275 sd_event_handler_t callback
,
1281 assert_return(e
, -EINVAL
);
1282 assert_return(e
= event_resolve(e
), -ENOPKG
);
1283 assert_return(callback
, -EINVAL
);
1284 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1285 assert_return(!event_pid_changed(e
), -ECHILD
);
1287 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1291 s
->defer
.callback
= callback
;
1292 s
->userdata
= userdata
;
1293 s
->enabled
= SD_EVENT_ONESHOT
;
1295 r
= source_set_pending(s
, true);
1307 _public_
int sd_event_add_post(
1309 sd_event_source
**ret
,
1310 sd_event_handler_t callback
,
1316 assert_return(e
, -EINVAL
);
1317 assert_return(e
= event_resolve(e
), -ENOPKG
);
1318 assert_return(callback
, -EINVAL
);
1319 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1320 assert_return(!event_pid_changed(e
), -ECHILD
);
1322 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1326 s
= source_new(e
, !ret
, SOURCE_POST
);
1330 s
->post
.callback
= callback
;
1331 s
->userdata
= userdata
;
1332 s
->enabled
= SD_EVENT_ON
;
1334 r
= set_put(e
->post_sources
, s
);
1346 _public_
int sd_event_add_exit(
1348 sd_event_source
**ret
,
1349 sd_event_handler_t callback
,
1355 assert_return(e
, -EINVAL
);
1356 assert_return(e
= event_resolve(e
), -ENOPKG
);
1357 assert_return(callback
, -EINVAL
);
1358 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1359 assert_return(!event_pid_changed(e
), -ECHILD
);
1361 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1365 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1369 s
->exit
.callback
= callback
;
1370 s
->userdata
= userdata
;
1371 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1372 s
->enabled
= SD_EVENT_ONESHOT
;
1374 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1386 _public_ sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
1391 assert(s
->n_ref
>= 1);
1397 _public_ sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
1402 assert(s
->n_ref
>= 1);
1405 if (s
->n_ref
<= 0) {
1406 /* Here's a special hack: when we are called from a
1407 * dispatch handler we won't free the event source
1408 * immediately, but we will detach the fd from the
1409 * epoll. This way it is safe for the caller to unref
1410 * the event source and immediately close the fd, but
1411 * we still retain a valid event source object after
1414 if (s
->dispatching
) {
1415 if (s
->type
== SOURCE_IO
)
1416 source_io_unregister(s
);
1418 source_disconnect(s
);
1426 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1427 assert_return(s
, -EINVAL
);
1428 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1430 return free_and_strdup(&s
->description
, description
);
1433 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1434 assert_return(s
, -EINVAL
);
1435 assert_return(description
, -EINVAL
);
1436 assert_return(s
->description
, -ENXIO
);
1437 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1439 *description
= s
->description
;
1443 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1444 assert_return(s
, NULL
);
1449 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1450 assert_return(s
, -EINVAL
);
1451 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1452 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1453 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1458 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1459 assert_return(s
, -EINVAL
);
1460 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1461 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1466 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1469 assert_return(s
, -EINVAL
);
1470 assert_return(fd
>= 0, -EBADF
);
1471 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1472 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1477 if (s
->enabled
== SD_EVENT_OFF
) {
1479 s
->io
.registered
= false;
1483 saved_fd
= s
->io
.fd
;
1484 assert(s
->io
.registered
);
1487 s
->io
.registered
= false;
1489 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1491 s
->io
.fd
= saved_fd
;
1492 s
->io
.registered
= true;
1496 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1502 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
1503 assert_return(s
, -EINVAL
);
1504 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1509 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
1510 assert_return(s
, -EINVAL
);
1511 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1517 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1518 assert_return(s
, -EINVAL
);
1519 assert_return(events
, -EINVAL
);
1520 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1521 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1523 *events
= s
->io
.events
;
1527 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1530 assert_return(s
, -EINVAL
);
1531 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1532 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1533 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1534 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1536 /* edge-triggered updates are never skipped, so we can reset edges */
1537 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1540 if (s
->enabled
!= SD_EVENT_OFF
) {
1541 r
= source_io_register(s
, s
->enabled
, events
);
1546 s
->io
.events
= events
;
1547 source_set_pending(s
, false);
1552 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1553 assert_return(s
, -EINVAL
);
1554 assert_return(revents
, -EINVAL
);
1555 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1556 assert_return(s
->pending
, -ENODATA
);
1557 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1559 *revents
= s
->io
.revents
;
1563 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1564 assert_return(s
, -EINVAL
);
1565 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1566 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1568 return s
->signal
.sig
;
1571 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1572 assert_return(s
, -EINVAL
);
1573 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1575 *priority
= s
->priority
;
1579 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1582 assert_return(s
, -EINVAL
);
1583 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1584 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1586 if (s
->priority
== priority
)
1589 if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1590 struct signal_data
*old
, *d
;
1592 /* Move us from the signalfd belonging to the old
1593 * priority to the signalfd of the new priority */
1595 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1597 s
->priority
= priority
;
1599 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1601 s
->priority
= old
->priority
;
1605 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1607 s
->priority
= priority
;
1610 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1613 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1615 if (s
->type
== SOURCE_EXIT
)
1616 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1621 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1622 assert_return(s
, -EINVAL
);
1623 assert_return(m
, -EINVAL
);
1624 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1630 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1633 assert_return(s
, -EINVAL
);
1634 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
1635 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1637 /* If we are dead anyway, we are fine with turning off
1638 * sources, but everything else needs to fail. */
1639 if (s
->event
->state
== SD_EVENT_FINISHED
)
1640 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1642 if (s
->enabled
== m
)
1645 if (m
== SD_EVENT_OFF
) {
1650 source_io_unregister(s
);
1654 case SOURCE_TIME_REALTIME
:
1655 case SOURCE_TIME_BOOTTIME
:
1656 case SOURCE_TIME_MONOTONIC
:
1657 case SOURCE_TIME_REALTIME_ALARM
:
1658 case SOURCE_TIME_BOOTTIME_ALARM
: {
1659 struct clock_data
*d
;
1662 d
= event_get_clock_data(s
->event
, s
->type
);
1665 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1666 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1667 d
->needs_rearm
= true;
1674 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1680 assert(s
->event
->n_enabled_child_sources
> 0);
1681 s
->event
->n_enabled_child_sources
--;
1683 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1688 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1697 assert_not_reached("Wut? I shouldn't exist.");
1704 r
= source_io_register(s
, m
, s
->io
.events
);
1711 case SOURCE_TIME_REALTIME
:
1712 case SOURCE_TIME_BOOTTIME
:
1713 case SOURCE_TIME_MONOTONIC
:
1714 case SOURCE_TIME_REALTIME_ALARM
:
1715 case SOURCE_TIME_BOOTTIME_ALARM
: {
1716 struct clock_data
*d
;
1719 d
= event_get_clock_data(s
->event
, s
->type
);
1722 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1723 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1724 d
->needs_rearm
= true;
1732 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
1734 s
->enabled
= SD_EVENT_OFF
;
1735 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1743 if (s
->enabled
== SD_EVENT_OFF
)
1744 s
->event
->n_enabled_child_sources
++;
1748 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
1750 s
->enabled
= SD_EVENT_OFF
;
1751 s
->event
->n_enabled_child_sources
--;
1752 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1760 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1769 assert_not_reached("Wut? I shouldn't exist.");
1774 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1777 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1782 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
1783 assert_return(s
, -EINVAL
);
1784 assert_return(usec
, -EINVAL
);
1785 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1786 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1788 *usec
= s
->time
.next
;
1792 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
1793 struct clock_data
*d
;
1795 assert_return(s
, -EINVAL
);
1796 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1797 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1798 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1800 s
->time
.next
= usec
;
1802 source_set_pending(s
, false);
1804 d
= event_get_clock_data(s
->event
, s
->type
);
1807 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1808 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1809 d
->needs_rearm
= true;
1814 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
1815 assert_return(s
, -EINVAL
);
1816 assert_return(usec
, -EINVAL
);
1817 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1818 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1820 *usec
= s
->time
.accuracy
;
1824 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
1825 struct clock_data
*d
;
1827 assert_return(s
, -EINVAL
);
1828 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1829 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1830 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1831 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1834 usec
= DEFAULT_ACCURACY_USEC
;
1836 s
->time
.accuracy
= usec
;
1838 source_set_pending(s
, false);
1840 d
= event_get_clock_data(s
->event
, s
->type
);
1843 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1844 d
->needs_rearm
= true;
1849 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
1850 assert_return(s
, -EINVAL
);
1851 assert_return(clock
, -EINVAL
);
1852 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1853 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1855 *clock
= event_source_type_to_clock(s
->type
);
1859 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
1860 assert_return(s
, -EINVAL
);
1861 assert_return(pid
, -EINVAL
);
1862 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
1863 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1865 *pid
= s
->child
.pid
;
1869 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
1872 assert_return(s
, -EINVAL
);
1873 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1874 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1875 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1877 if (s
->prepare
== callback
)
1880 if (callback
&& s
->prepare
) {
1881 s
->prepare
= callback
;
1885 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
1889 s
->prepare
= callback
;
1892 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
1896 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1901 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
1902 assert_return(s
, NULL
);
1907 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
1910 assert_return(s
, NULL
);
1913 s
->userdata
= userdata
;
1918 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
1925 if (a
>= USEC_INFINITY
)
1926 return USEC_INFINITY
;
1931 initialize_perturb(e
);
1934 Find a good time to wake up again between times a and b. We
1935 have two goals here:
1937 a) We want to wake up as seldom as possible, hence prefer
1938 later times over earlier times.
1940 b) But if we have to wake up, then let's make sure to
1941 dispatch as much as possible on the entire system.
1943 We implement this by waking up everywhere at the same time
1944 within any given minute if we can, synchronised via the
1945 perturbation value determined from the boot ID. If we can't,
1946 then we try to find the same spot in every 10s, then 1s and
1947 then 250ms step. Otherwise, we pick the last possible time
1951 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
1953 if (_unlikely_(c
< USEC_PER_MINUTE
))
1956 c
-= USEC_PER_MINUTE
;
1962 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
1964 if (_unlikely_(c
< USEC_PER_SEC
*10))
1967 c
-= USEC_PER_SEC
*10;
1973 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
1975 if (_unlikely_(c
< USEC_PER_SEC
))
1984 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
1986 if (_unlikely_(c
< USEC_PER_MSEC
*250))
1989 c
-= USEC_PER_MSEC
*250;
1998 static int event_arm_timer(
2000 struct clock_data
*d
) {
2002 struct itimerspec its
= {};
2003 sd_event_source
*a
, *b
;
2010 if (!d
->needs_rearm
)
2013 d
->needs_rearm
= false;
2015 a
= prioq_peek(d
->earliest
);
2016 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2021 if (d
->next
== USEC_INFINITY
)
2025 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2029 d
->next
= USEC_INFINITY
;
2033 b
= prioq_peek(d
->latest
);
2034 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2036 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2040 assert_se(d
->fd
>= 0);
2043 /* We don' want to disarm here, just mean some time looooong ago. */
2044 its
.it_value
.tv_sec
= 0;
2045 its
.it_value
.tv_nsec
= 1;
2047 timespec_store(&its
.it_value
, t
);
2049 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2057 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2060 assert(s
->type
== SOURCE_IO
);
2062 /* If the event source was already pending, we just OR in the
2063 * new revents, otherwise we reset the value. The ORing is
2064 * necessary to handle EPOLLONESHOT events properly where
2065 * readability might happen independently of writability, and
2066 * we need to keep track of both */
2069 s
->io
.revents
|= revents
;
2071 s
->io
.revents
= revents
;
2073 return source_set_pending(s
, true);
2076 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2083 assert_return(events
== EPOLLIN
, -EIO
);
2085 ss
= read(fd
, &x
, sizeof(x
));
2087 if (IN_SET(errno
, EAGAIN
, EINTR
))
2093 if (_unlikely_(ss
!= sizeof(x
)))
2097 *next
= USEC_INFINITY
;
2102 static int process_timer(
2105 struct clock_data
*d
) {
2114 s
= prioq_peek(d
->earliest
);
2117 s
->enabled
== SD_EVENT_OFF
||
2121 r
= source_set_pending(s
, true);
2125 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2126 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2127 d
->needs_rearm
= true;
2133 static int process_child(sd_event
*e
) {
2140 e
->need_process_child
= false;
2143 So, this is ugly. We iteratively invoke waitid() with P_PID
2144 + WNOHANG for each PID we wait for, instead of using
2145 P_ALL. This is because we only want to get child
2146 information of very specific child processes, and not all
2147 of them. We might not have processed the SIGCHLD even of a
2148 previous invocation and we don't want to maintain a
2149 unbounded *per-child* event queue, hence we really don't
2150 want anything flushed out of the kernel's queue that we
2151 don't care about. Since this is O(n) this means that if you
2152 have a lot of processes you probably want to handle SIGCHLD
2155 We do not reap the children here (by using WNOWAIT), this
2156 is only done after the event source is dispatched so that
2157 the callback still sees the process as a zombie.
2160 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2161 assert(s
->type
== SOURCE_CHILD
);
2166 if (s
->enabled
== SD_EVENT_OFF
)
2169 zero(s
->child
.siginfo
);
2170 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2171 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2175 if (s
->child
.siginfo
.si_pid
!= 0) {
2176 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2178 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2179 /* If the child isn't dead then let's
2180 * immediately remove the state change
2181 * from the queue, since there's no
2182 * benefit in leaving it queued */
2184 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2185 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2188 r
= source_set_pending(s
, true);
2197 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2198 bool read_one
= false;
2202 assert_return(events
== EPOLLIN
, -EIO
);
2204 /* If there's a signal queued on this priority and SIGCHLD is
2205 on this priority too, then make sure to recheck the
2206 children we watch. This is because we only ever dequeue
2207 the first signal per priority, and if we dequeue one, and
2208 SIGCHLD might be enqueued later we wouldn't know, but we
2209 might have higher priority children we care about hence we
2210 need to check that explicitly. */
2212 if (sigismember(&d
->sigset
, SIGCHLD
))
2213 e
->need_process_child
= true;
2215 /* If there's already an event source pending for this
2216 * priority we don't read another */
2221 struct signalfd_siginfo si
;
2223 sd_event_source
*s
= NULL
;
2225 n
= read(d
->fd
, &si
, sizeof(si
));
2227 if (IN_SET(errno
, EAGAIN
, EINTR
))
2233 if (_unlikely_(n
!= sizeof(si
)))
2236 assert(SIGNAL_VALID(si
.ssi_signo
));
2240 if (e
->signal_sources
)
2241 s
= e
->signal_sources
[si
.ssi_signo
];
2247 s
->signal
.siginfo
= si
;
2250 r
= source_set_pending(s
, true);
2258 static int source_dispatch(sd_event_source
*s
) {
2259 EventSourceType saved_type
;
2263 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2265 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2267 saved_type
= s
->type
;
2269 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2270 r
= source_set_pending(s
, false);
2275 if (s
->type
!= SOURCE_POST
) {
2279 /* If we execute a non-post source, let's mark all
2280 * post sources as pending */
2282 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2283 if (z
->enabled
== SD_EVENT_OFF
)
2286 r
= source_set_pending(z
, true);
2292 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2293 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2298 s
->dispatching
= true;
2303 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2306 case SOURCE_TIME_REALTIME
:
2307 case SOURCE_TIME_BOOTTIME
:
2308 case SOURCE_TIME_MONOTONIC
:
2309 case SOURCE_TIME_REALTIME_ALARM
:
2310 case SOURCE_TIME_BOOTTIME_ALARM
:
2311 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2315 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2318 case SOURCE_CHILD
: {
2321 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2323 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2325 /* Now, reap the PID for good. */
2327 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2333 r
= s
->defer
.callback(s
, s
->userdata
);
2337 r
= s
->post
.callback(s
, s
->userdata
);
2341 r
= s
->exit
.callback(s
, s
->userdata
);
2344 case SOURCE_WATCHDOG
:
2345 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2346 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2347 assert_not_reached("Wut? I shouldn't exist.");
2350 s
->dispatching
= false;
2353 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
2354 strna(s
->description
), event_source_type_to_string(saved_type
));
2359 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2364 static int event_prepare(sd_event
*e
) {
2372 s
= prioq_peek(e
->prepare
);
2373 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2376 s
->prepare_iteration
= e
->iteration
;
2377 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2383 s
->dispatching
= true;
2384 r
= s
->prepare(s
, s
->userdata
);
2385 s
->dispatching
= false;
2388 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2389 strna(s
->description
), event_source_type_to_string(s
->type
));
2394 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2400 static int dispatch_exit(sd_event
*e
) {
2406 p
= prioq_peek(e
->exit
);
2407 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2408 e
->state
= SD_EVENT_FINISHED
;
2414 e
->state
= SD_EVENT_EXITING
;
2416 r
= source_dispatch(p
);
2418 e
->state
= SD_EVENT_INITIAL
;
2424 static sd_event_source
* event_next_pending(sd_event
*e
) {
2429 p
= prioq_peek(e
->pending
);
2433 if (p
->enabled
== SD_EVENT_OFF
)
2439 static int arm_watchdog(sd_event
*e
) {
2440 struct itimerspec its
= {};
2445 assert(e
->watchdog_fd
>= 0);
2447 t
= sleep_between(e
,
2448 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2449 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2451 timespec_store(&its
.it_value
, t
);
2453 /* Make sure we never set the watchdog to 0, which tells the
2454 * kernel to disable it. */
2455 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
2456 its
.it_value
.tv_nsec
= 1;
2458 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2465 static int process_watchdog(sd_event
*e
) {
2471 /* Don't notify watchdog too often */
2472 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
2475 sd_notify(false, "WATCHDOG=1");
2476 e
->watchdog_last
= e
->timestamp
.monotonic
;
2478 return arm_watchdog(e
);
2481 _public_
int sd_event_prepare(sd_event
*e
) {
2484 assert_return(e
, -EINVAL
);
2485 assert_return(e
= event_resolve(e
), -ENOPKG
);
2486 assert_return(!event_pid_changed(e
), -ECHILD
);
2487 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2488 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2490 if (e
->exit_requested
)
2495 e
->state
= SD_EVENT_PREPARING
;
2496 r
= event_prepare(e
);
2497 e
->state
= SD_EVENT_INITIAL
;
2501 r
= event_arm_timer(e
, &e
->realtime
);
2505 r
= event_arm_timer(e
, &e
->boottime
);
2509 r
= event_arm_timer(e
, &e
->monotonic
);
2513 r
= event_arm_timer(e
, &e
->realtime_alarm
);
2517 r
= event_arm_timer(e
, &e
->boottime_alarm
);
2521 if (event_next_pending(e
) || e
->need_process_child
)
2524 e
->state
= SD_EVENT_ARMED
;
2529 e
->state
= SD_EVENT_ARMED
;
2530 r
= sd_event_wait(e
, 0);
2532 e
->state
= SD_EVENT_ARMED
;
2537 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
2538 struct epoll_event
*ev_queue
;
2539 unsigned ev_queue_max
;
2542 assert_return(e
, -EINVAL
);
2543 assert_return(e
= event_resolve(e
), -ENOPKG
);
2544 assert_return(!event_pid_changed(e
), -ECHILD
);
2545 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2546 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
2548 if (e
->exit_requested
) {
2549 e
->state
= SD_EVENT_PENDING
;
2553 ev_queue_max
= MAX(e
->n_sources
, 1u);
2554 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
2556 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
2557 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
2559 if (errno
== EINTR
) {
2560 e
->state
= SD_EVENT_PENDING
;
2568 triple_timestamp_get(&e
->timestamp
);
2570 for (i
= 0; i
< m
; i
++) {
2572 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
2573 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
2575 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
2579 case WAKEUP_EVENT_SOURCE
:
2580 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2583 case WAKEUP_CLOCK_DATA
: {
2584 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
2585 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
2589 case WAKEUP_SIGNAL_DATA
:
2590 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2594 assert_not_reached("Invalid wake-up pointer");
2601 r
= process_watchdog(e
);
2605 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
2609 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
2613 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
2617 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
2621 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
2625 if (e
->need_process_child
) {
2626 r
= process_child(e
);
2631 if (event_next_pending(e
)) {
2632 e
->state
= SD_EVENT_PENDING
;
2640 e
->state
= SD_EVENT_INITIAL
;
2645 _public_
int sd_event_dispatch(sd_event
*e
) {
2649 assert_return(e
, -EINVAL
);
2650 assert_return(e
= event_resolve(e
), -ENOPKG
);
2651 assert_return(!event_pid_changed(e
), -ECHILD
);
2652 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2653 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
2655 if (e
->exit_requested
)
2656 return dispatch_exit(e
);
2658 p
= event_next_pending(e
);
2662 e
->state
= SD_EVENT_RUNNING
;
2663 r
= source_dispatch(p
);
2664 e
->state
= SD_EVENT_INITIAL
;
2671 e
->state
= SD_EVENT_INITIAL
;
2676 static void event_log_delays(sd_event
*e
) {
2677 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1];
2681 for (i
= o
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
2682 o
+= snprintf(&b
[o
], sizeof(b
) - o
, "%u ", e
->delays
[i
]);
2685 log_debug("Event loop iterations: %.*s", o
, b
);
2688 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
2691 assert_return(e
, -EINVAL
);
2692 assert_return(e
= event_resolve(e
), -ENOPKG
);
2693 assert_return(!event_pid_changed(e
), -ECHILD
);
2694 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2695 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2697 if (e
->profile_delays
&& e
->last_run
) {
2701 this_run
= now(CLOCK_MONOTONIC
);
2703 l
= u64log2(this_run
- e
->last_run
);
2704 assert(l
< sizeof(e
->delays
));
2707 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
2708 event_log_delays(e
);
2709 e
->last_log
= this_run
;
2713 r
= sd_event_prepare(e
);
2715 /* There was nothing? Then wait... */
2716 r
= sd_event_wait(e
, timeout
);
2718 if (e
->profile_delays
)
2719 e
->last_run
= now(CLOCK_MONOTONIC
);
2722 /* There's something now, then let's dispatch it */
2723 r
= sd_event_dispatch(e
);
2733 _public_
int sd_event_loop(sd_event
*e
) {
2736 assert_return(e
, -EINVAL
);
2737 assert_return(e
= event_resolve(e
), -ENOPKG
);
2738 assert_return(!event_pid_changed(e
), -ECHILD
);
2739 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2743 while (e
->state
!= SD_EVENT_FINISHED
) {
2744 r
= sd_event_run(e
, (uint64_t) -1);
2756 _public_
int sd_event_get_fd(sd_event
*e
) {
2758 assert_return(e
, -EINVAL
);
2759 assert_return(e
= event_resolve(e
), -ENOPKG
);
2760 assert_return(!event_pid_changed(e
), -ECHILD
);
2765 _public_
int sd_event_get_state(sd_event
*e
) {
2766 assert_return(e
, -EINVAL
);
2767 assert_return(e
= event_resolve(e
), -ENOPKG
);
2768 assert_return(!event_pid_changed(e
), -ECHILD
);
2773 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
2774 assert_return(e
, -EINVAL
);
2775 assert_return(e
= event_resolve(e
), -ENOPKG
);
2776 assert_return(code
, -EINVAL
);
2777 assert_return(!event_pid_changed(e
), -ECHILD
);
2779 if (!e
->exit_requested
)
2782 *code
= e
->exit_code
;
2786 _public_
int sd_event_exit(sd_event
*e
, int code
) {
2787 assert_return(e
, -EINVAL
);
2788 assert_return(e
= event_resolve(e
), -ENOPKG
);
2789 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2790 assert_return(!event_pid_changed(e
), -ECHILD
);
2792 e
->exit_requested
= true;
2793 e
->exit_code
= code
;
2798 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
2799 assert_return(e
, -EINVAL
);
2800 assert_return(e
= event_resolve(e
), -ENOPKG
);
2801 assert_return(usec
, -EINVAL
);
2802 assert_return(!event_pid_changed(e
), -ECHILD
);
2804 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
2807 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2808 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2809 * the purpose of getting the time this doesn't matter. */
2810 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
2813 if (!triple_timestamp_is_set(&e
->timestamp
)) {
2814 /* Implicitly fall back to now() if we never ran
2815 * before and thus have no cached time. */
2820 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
2824 _public_
int sd_event_default(sd_event
**ret
) {
2829 return !!default_event
;
2831 if (default_event
) {
2832 *ret
= sd_event_ref(default_event
);
2836 r
= sd_event_new(&e
);
2840 e
->default_event_ptr
= &default_event
;
2848 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
2849 assert_return(e
, -EINVAL
);
2850 assert_return(e
= event_resolve(e
), -ENOPKG
);
2851 assert_return(tid
, -EINVAL
);
2852 assert_return(!event_pid_changed(e
), -ECHILD
);
2862 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
2865 assert_return(e
, -EINVAL
);
2866 assert_return(e
= event_resolve(e
), -ENOPKG
);
2867 assert_return(!event_pid_changed(e
), -ECHILD
);
2869 if (e
->watchdog
== !!b
)
2873 struct epoll_event ev
= {};
2875 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
2879 /* Issue first ping immediately */
2880 sd_notify(false, "WATCHDOG=1");
2881 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
2883 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
2884 if (e
->watchdog_fd
< 0)
2887 r
= arm_watchdog(e
);
2891 ev
.events
= EPOLLIN
;
2892 ev
.data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
);
2894 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
2901 if (e
->watchdog_fd
>= 0) {
2902 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
2903 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2911 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2915 _public_
int sd_event_get_watchdog(sd_event
*e
) {
2916 assert_return(e
, -EINVAL
);
2917 assert_return(e
= event_resolve(e
), -ENOPKG
);
2918 assert_return(!event_pid_changed(e
), -ECHILD
);
2923 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
2924 assert_return(e
, -EINVAL
);
2925 assert_return(e
= event_resolve(e
), -ENOPKG
);
2926 assert_return(!event_pid_changed(e
), -ECHILD
);
2928 *ret
= e
->iteration
;