1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
26 #include "sd-daemon.h"
30 #include "alloc-util.h"
38 #include "signal-util.h"
39 #include "string-util.h"
40 #include "time-util.h"
43 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
45 typedef enum EventSourceType
{
49 SOURCE_TIME_MONOTONIC
,
50 SOURCE_TIME_REALTIME_ALARM
,
51 SOURCE_TIME_BOOTTIME_ALARM
,
58 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
59 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
62 /* All objects we use in epoll events start with this value, so that
63 * we know how to dispatch it */
64 typedef enum WakeupType
{
70 _WAKEUP_TYPE_INVALID
= -1,
73 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
75 struct sd_event_source
{
82 sd_event_handler_t prepare
;
86 EventSourceType type
:5;
93 unsigned pending_index
;
94 unsigned prepare_index
;
95 unsigned pending_iteration
;
96 unsigned prepare_iteration
;
98 LIST_FIELDS(sd_event_source
, sources
);
102 sd_event_io_handler_t callback
;
109 sd_event_time_handler_t callback
;
110 usec_t next
, accuracy
;
111 unsigned earliest_index
;
112 unsigned latest_index
;
115 sd_event_signal_handler_t callback
;
116 struct signalfd_siginfo siginfo
;
120 sd_event_child_handler_t callback
;
126 sd_event_handler_t callback
;
129 sd_event_handler_t callback
;
132 sd_event_handler_t callback
;
133 unsigned prioq_index
;
142 /* For all clocks we maintain two priority queues each, one
143 * ordered for the earliest times the events may be
144 * dispatched, and one ordered by the latest times they must
145 * have been dispatched. The range between the top entries in
146 * the two prioqs is the time window we can freely schedule
159 /* For each priority we maintain one signal fd, so that we
160 * only have to dequeue a single event per priority at a
166 sd_event_source
*current
;
178 /* timerfd_create() only supports these five clocks so far. We
179 * can add support for more clocks when the kernel learns to
180 * deal with them, too. */
181 struct clock_data realtime
;
182 struct clock_data boottime
;
183 struct clock_data monotonic
;
184 struct clock_data realtime_alarm
;
185 struct clock_data boottime_alarm
;
189 sd_event_source
**signal_sources
; /* indexed by signal number */
190 Hashmap
*signal_data
; /* indexed by priority */
192 Hashmap
*child_sources
;
193 unsigned n_enabled_child_sources
;
202 dual_timestamp timestamp
;
203 usec_t timestamp_boottime
;
206 bool exit_requested
:1;
207 bool need_process_child
:1;
213 sd_event
**default_event_ptr
;
215 usec_t watchdog_last
, watchdog_period
;
219 LIST_HEAD(sd_event_source
, sources
);
222 static void source_disconnect(sd_event_source
*s
);
224 static int pending_prioq_compare(const void *a
, const void *b
) {
225 const sd_event_source
*x
= a
, *y
= b
;
230 /* Enabled ones first */
231 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
233 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
236 /* Lower priority values first */
237 if (x
->priority
< y
->priority
)
239 if (x
->priority
> y
->priority
)
242 /* Older entries first */
243 if (x
->pending_iteration
< y
->pending_iteration
)
245 if (x
->pending_iteration
> y
->pending_iteration
)
251 static int prepare_prioq_compare(const void *a
, const void *b
) {
252 const sd_event_source
*x
= a
, *y
= b
;
257 /* Enabled ones first */
258 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
260 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
263 /* Move most recently prepared ones last, so that we can stop
264 * preparing as soon as we hit one that has already been
265 * prepared in the current iteration */
266 if (x
->prepare_iteration
< y
->prepare_iteration
)
268 if (x
->prepare_iteration
> y
->prepare_iteration
)
271 /* Lower priority values first */
272 if (x
->priority
< y
->priority
)
274 if (x
->priority
> y
->priority
)
280 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
281 const sd_event_source
*x
= a
, *y
= b
;
283 assert(EVENT_SOURCE_IS_TIME(x
->type
));
284 assert(x
->type
== y
->type
);
286 /* Enabled ones first */
287 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
289 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
292 /* Move the pending ones to the end */
293 if (!x
->pending
&& y
->pending
)
295 if (x
->pending
&& !y
->pending
)
299 if (x
->time
.next
< y
->time
.next
)
301 if (x
->time
.next
> y
->time
.next
)
307 static int latest_time_prioq_compare(const void *a
, const void *b
) {
308 const sd_event_source
*x
= a
, *y
= b
;
310 assert(EVENT_SOURCE_IS_TIME(x
->type
));
311 assert(x
->type
== y
->type
);
313 /* Enabled ones first */
314 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
316 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
319 /* Move the pending ones to the end */
320 if (!x
->pending
&& y
->pending
)
322 if (x
->pending
&& !y
->pending
)
326 if (x
->time
.next
+ x
->time
.accuracy
< y
->time
.next
+ y
->time
.accuracy
)
328 if (x
->time
.next
+ x
->time
.accuracy
> y
->time
.next
+ y
->time
.accuracy
)
334 static int exit_prioq_compare(const void *a
, const void *b
) {
335 const sd_event_source
*x
= a
, *y
= b
;
337 assert(x
->type
== SOURCE_EXIT
);
338 assert(y
->type
== SOURCE_EXIT
);
340 /* Enabled ones first */
341 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
343 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
346 /* Lower priority values first */
347 if (x
->priority
< y
->priority
)
349 if (x
->priority
> y
->priority
)
355 static void free_clock_data(struct clock_data
*d
) {
357 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
360 prioq_free(d
->earliest
);
361 prioq_free(d
->latest
);
364 static void event_free(sd_event
*e
) {
369 while ((s
= e
->sources
)) {
371 source_disconnect(s
);
372 sd_event_source_unref(s
);
375 assert(e
->n_sources
== 0);
377 if (e
->default_event_ptr
)
378 *(e
->default_event_ptr
) = NULL
;
380 safe_close(e
->epoll_fd
);
381 safe_close(e
->watchdog_fd
);
383 free_clock_data(&e
->realtime
);
384 free_clock_data(&e
->boottime
);
385 free_clock_data(&e
->monotonic
);
386 free_clock_data(&e
->realtime_alarm
);
387 free_clock_data(&e
->boottime_alarm
);
389 prioq_free(e
->pending
);
390 prioq_free(e
->prepare
);
393 free(e
->signal_sources
);
394 hashmap_free(e
->signal_data
);
396 hashmap_free(e
->child_sources
);
397 set_free(e
->post_sources
);
401 _public_
int sd_event_new(sd_event
** ret
) {
405 assert_return(ret
, -EINVAL
);
407 e
= new0(sd_event
, 1);
412 e
->watchdog_fd
= e
->epoll_fd
= e
->realtime
.fd
= e
->boottime
.fd
= e
->monotonic
.fd
= e
->realtime_alarm
.fd
= e
->boottime_alarm
.fd
= -1;
413 e
->realtime
.next
= e
->boottime
.next
= e
->monotonic
.next
= e
->realtime_alarm
.next
= e
->boottime_alarm
.next
= USEC_INFINITY
;
414 e
->realtime
.wakeup
= e
->boottime
.wakeup
= e
->monotonic
.wakeup
= e
->realtime_alarm
.wakeup
= e
->boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
;
415 e
->original_pid
= getpid();
416 e
->perturb
= USEC_INFINITY
;
418 e
->pending
= prioq_new(pending_prioq_compare
);
424 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
425 if (e
->epoll_fd
< 0) {
438 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
439 assert_return(e
, NULL
);
441 assert(e
->n_ref
>= 1);
447 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
452 assert(e
->n_ref
>= 1);
461 static bool event_pid_changed(sd_event
*e
) {
464 /* We don't support people creating an event loop and keeping
465 * it around over a fork(). Let's complain. */
467 return e
->original_pid
!= getpid();
470 static void source_io_unregister(sd_event_source
*s
) {
474 assert(s
->type
== SOURCE_IO
);
476 if (event_pid_changed(s
->event
))
479 if (!s
->io
.registered
)
482 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
484 log_debug_errno(errno
, "Failed to remove source %s from epoll: %m", strna(s
->description
));
486 s
->io
.registered
= false;
489 static int source_io_register(
494 struct epoll_event ev
= {};
498 assert(s
->type
== SOURCE_IO
);
499 assert(enabled
!= SD_EVENT_OFF
);
504 if (enabled
== SD_EVENT_ONESHOT
)
505 ev
.events
|= EPOLLONESHOT
;
507 if (s
->io
.registered
)
508 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
510 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
514 s
->io
.registered
= true;
519 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
523 case SOURCE_TIME_REALTIME
:
524 return CLOCK_REALTIME
;
526 case SOURCE_TIME_BOOTTIME
:
527 return CLOCK_BOOTTIME
;
529 case SOURCE_TIME_MONOTONIC
:
530 return CLOCK_MONOTONIC
;
532 case SOURCE_TIME_REALTIME_ALARM
:
533 return CLOCK_REALTIME_ALARM
;
535 case SOURCE_TIME_BOOTTIME_ALARM
:
536 return CLOCK_BOOTTIME_ALARM
;
539 return (clockid_t
) -1;
543 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
548 return SOURCE_TIME_REALTIME
;
551 return SOURCE_TIME_BOOTTIME
;
553 case CLOCK_MONOTONIC
:
554 return SOURCE_TIME_MONOTONIC
;
556 case CLOCK_REALTIME_ALARM
:
557 return SOURCE_TIME_REALTIME_ALARM
;
559 case CLOCK_BOOTTIME_ALARM
:
560 return SOURCE_TIME_BOOTTIME_ALARM
;
563 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
567 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
572 case SOURCE_TIME_REALTIME
:
575 case SOURCE_TIME_BOOTTIME
:
578 case SOURCE_TIME_MONOTONIC
:
579 return &e
->monotonic
;
581 case SOURCE_TIME_REALTIME_ALARM
:
582 return &e
->realtime_alarm
;
584 case SOURCE_TIME_BOOTTIME_ALARM
:
585 return &e
->boottime_alarm
;
592 static int event_make_signal_data(
595 struct signal_data
**ret
) {
597 struct epoll_event ev
= {};
598 struct signal_data
*d
;
606 if (event_pid_changed(e
))
609 if (e
->signal_sources
&& e
->signal_sources
[sig
])
610 priority
= e
->signal_sources
[sig
]->priority
;
614 d
= hashmap_get(e
->signal_data
, &priority
);
616 if (sigismember(&d
->sigset
, sig
) > 0) {
622 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
626 d
= new0(struct signal_data
, 1);
630 d
->wakeup
= WAKEUP_SIGNAL_DATA
;
632 d
->priority
= priority
;
634 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
642 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
644 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
663 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
676 d
->fd
= safe_close(d
->fd
);
677 hashmap_remove(e
->signal_data
, &d
->priority
);
684 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
688 /* Turns off the specified signal in the signal data
689 * object. If the signal mask of the object becomes empty that
692 if (sigismember(&d
->sigset
, sig
) == 0)
695 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
697 if (sigisemptyset(&d
->sigset
)) {
699 /* If all the mask is all-zero we can get rid of the structure */
700 hashmap_remove(e
->signal_data
, &d
->priority
);
709 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
710 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
713 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
714 struct signal_data
*d
;
715 static const int64_t zero_priority
= 0;
719 /* Rechecks if the specified signal is still something we are
720 * interested in. If not, we'll unmask it, and possibly drop
721 * the signalfd for it. */
723 if (sig
== SIGCHLD
&&
724 e
->n_enabled_child_sources
> 0)
727 if (e
->signal_sources
&&
728 e
->signal_sources
[sig
] &&
729 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
733 * The specified signal might be enabled in three different queues:
735 * 1) the one that belongs to the priority passed (if it is non-NULL)
736 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
737 * 3) the 0 priority (to cover the SIGCHLD case)
739 * Hence, let's remove it from all three here.
743 d
= hashmap_get(e
->signal_data
, priority
);
745 event_unmask_signal_data(e
, d
, sig
);
748 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
749 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
751 event_unmask_signal_data(e
, d
, sig
);
754 d
= hashmap_get(e
->signal_data
, &zero_priority
);
756 event_unmask_signal_data(e
, d
, sig
);
759 static void source_disconnect(sd_event_source
*s
) {
767 assert(s
->event
->n_sources
> 0);
773 source_io_unregister(s
);
777 case SOURCE_TIME_REALTIME
:
778 case SOURCE_TIME_BOOTTIME
:
779 case SOURCE_TIME_MONOTONIC
:
780 case SOURCE_TIME_REALTIME_ALARM
:
781 case SOURCE_TIME_BOOTTIME_ALARM
: {
782 struct clock_data
*d
;
784 d
= event_get_clock_data(s
->event
, s
->type
);
787 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
788 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
789 d
->needs_rearm
= true;
794 if (s
->signal
.sig
> 0) {
796 if (s
->event
->signal_sources
)
797 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
799 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
805 if (s
->child
.pid
> 0) {
806 if (s
->enabled
!= SD_EVENT_OFF
) {
807 assert(s
->event
->n_enabled_child_sources
> 0);
808 s
->event
->n_enabled_child_sources
--;
811 (void) hashmap_remove(s
->event
->child_sources
, INT_TO_PTR(s
->child
.pid
));
812 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
822 set_remove(s
->event
->post_sources
, s
);
826 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
830 assert_not_reached("Wut? I shouldn't exist.");
834 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
837 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
841 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
843 LIST_REMOVE(sources
, event
->sources
, s
);
847 sd_event_unref(event
);
850 static void source_free(sd_event_source
*s
) {
853 source_disconnect(s
);
854 free(s
->description
);
858 static int source_set_pending(sd_event_source
*s
, bool b
) {
862 assert(s
->type
!= SOURCE_EXIT
);
870 s
->pending_iteration
= s
->event
->iteration
;
872 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
878 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
880 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
881 struct clock_data
*d
;
883 d
= event_get_clock_data(s
->event
, s
->type
);
886 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
887 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
888 d
->needs_rearm
= true;
891 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
892 struct signal_data
*d
;
894 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
895 if (d
&& d
->current
== s
)
902 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
907 s
= new0(sd_event_source
, 1);
913 s
->floating
= floating
;
915 s
->pending_index
= s
->prepare_index
= PRIOQ_IDX_NULL
;
920 LIST_PREPEND(sources
, e
->sources
, s
);
926 _public_
int sd_event_add_io(
928 sd_event_source
**ret
,
931 sd_event_io_handler_t callback
,
937 assert_return(e
, -EINVAL
);
938 assert_return(fd
>= 0, -EBADF
);
939 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
940 assert_return(callback
, -EINVAL
);
941 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
942 assert_return(!event_pid_changed(e
), -ECHILD
);
944 s
= source_new(e
, !ret
, SOURCE_IO
);
948 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
950 s
->io
.events
= events
;
951 s
->io
.callback
= callback
;
952 s
->userdata
= userdata
;
953 s
->enabled
= SD_EVENT_ON
;
955 r
= source_io_register(s
, s
->enabled
, events
);
967 static void initialize_perturb(sd_event
*e
) {
968 sd_id128_t bootid
= {};
970 /* When we sleep for longer, we try to realign the wakeup to
971 the same time wihtin each minute/second/250ms, so that
972 events all across the system can be coalesced into a single
973 CPU wakeup. However, let's take some system-specific
974 randomness for this value, so that in a network of systems
975 with synced clocks timer events are distributed a
976 bit. Here, we calculate a perturbation usec offset from the
979 if (_likely_(e
->perturb
!= USEC_INFINITY
))
982 if (sd_id128_get_boot(&bootid
) >= 0)
983 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
986 static int event_setup_timer_fd(
988 struct clock_data
*d
,
991 struct epoll_event ev
= {};
997 if (_likely_(d
->fd
>= 0))
1000 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1004 ev
.events
= EPOLLIN
;
1007 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1017 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1020 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1023 _public_
int sd_event_add_time(
1025 sd_event_source
**ret
,
1029 sd_event_time_handler_t callback
,
1032 EventSourceType type
;
1034 struct clock_data
*d
;
1037 assert_return(e
, -EINVAL
);
1038 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1039 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1040 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1041 assert_return(!event_pid_changed(e
), -ECHILD
);
1044 callback
= time_exit_callback
;
1046 type
= clock_to_event_source_type(clock
);
1047 assert_return(type
>= 0, -EOPNOTSUPP
);
1049 d
= event_get_clock_data(e
, type
);
1053 d
->earliest
= prioq_new(earliest_time_prioq_compare
);
1059 d
->latest
= prioq_new(latest_time_prioq_compare
);
1065 r
= event_setup_timer_fd(e
, d
, clock
);
1070 s
= source_new(e
, !ret
, type
);
1074 s
->time
.next
= usec
;
1075 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1076 s
->time
.callback
= callback
;
1077 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1078 s
->userdata
= userdata
;
1079 s
->enabled
= SD_EVENT_ONESHOT
;
1081 d
->needs_rearm
= true;
1083 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1087 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1101 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1104 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1107 _public_
int sd_event_add_signal(
1109 sd_event_source
**ret
,
1111 sd_event_signal_handler_t callback
,
1115 struct signal_data
*d
;
1119 assert_return(e
, -EINVAL
);
1120 assert_return(sig
> 0, -EINVAL
);
1121 assert_return(sig
< _NSIG
, -EINVAL
);
1122 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1123 assert_return(!event_pid_changed(e
), -ECHILD
);
1126 callback
= signal_exit_callback
;
1128 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1132 if (!sigismember(&ss
, sig
))
1135 if (!e
->signal_sources
) {
1136 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1137 if (!e
->signal_sources
)
1139 } else if (e
->signal_sources
[sig
])
1142 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1146 s
->signal
.sig
= sig
;
1147 s
->signal
.callback
= callback
;
1148 s
->userdata
= userdata
;
1149 s
->enabled
= SD_EVENT_ON
;
1151 e
->signal_sources
[sig
] = s
;
1153 r
= event_make_signal_data(e
, sig
, &d
);
1159 /* Use the signal name as description for the event source by default */
1160 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1168 _public_
int sd_event_add_child(
1170 sd_event_source
**ret
,
1173 sd_event_child_handler_t callback
,
1179 assert_return(e
, -EINVAL
);
1180 assert_return(pid
> 1, -EINVAL
);
1181 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1182 assert_return(options
!= 0, -EINVAL
);
1183 assert_return(callback
, -EINVAL
);
1184 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1185 assert_return(!event_pid_changed(e
), -ECHILD
);
1187 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1191 if (hashmap_contains(e
->child_sources
, INT_TO_PTR(pid
)))
1194 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1199 s
->child
.options
= options
;
1200 s
->child
.callback
= callback
;
1201 s
->userdata
= userdata
;
1202 s
->enabled
= SD_EVENT_ONESHOT
;
1204 r
= hashmap_put(e
->child_sources
, INT_TO_PTR(pid
), s
);
1210 e
->n_enabled_child_sources
++;
1212 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1214 e
->n_enabled_child_sources
--;
1219 e
->need_process_child
= true;
1227 _public_
int sd_event_add_defer(
1229 sd_event_source
**ret
,
1230 sd_event_handler_t callback
,
1236 assert_return(e
, -EINVAL
);
1237 assert_return(callback
, -EINVAL
);
1238 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1239 assert_return(!event_pid_changed(e
), -ECHILD
);
1241 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1245 s
->defer
.callback
= callback
;
1246 s
->userdata
= userdata
;
1247 s
->enabled
= SD_EVENT_ONESHOT
;
1249 r
= source_set_pending(s
, true);
1261 _public_
int sd_event_add_post(
1263 sd_event_source
**ret
,
1264 sd_event_handler_t callback
,
1270 assert_return(e
, -EINVAL
);
1271 assert_return(callback
, -EINVAL
);
1272 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1273 assert_return(!event_pid_changed(e
), -ECHILD
);
1275 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1279 s
= source_new(e
, !ret
, SOURCE_POST
);
1283 s
->post
.callback
= callback
;
1284 s
->userdata
= userdata
;
1285 s
->enabled
= SD_EVENT_ON
;
1287 r
= set_put(e
->post_sources
, s
);
1299 _public_
int sd_event_add_exit(
1301 sd_event_source
**ret
,
1302 sd_event_handler_t callback
,
1308 assert_return(e
, -EINVAL
);
1309 assert_return(callback
, -EINVAL
);
1310 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1311 assert_return(!event_pid_changed(e
), -ECHILD
);
1314 e
->exit
= prioq_new(exit_prioq_compare
);
1319 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1323 s
->exit
.callback
= callback
;
1324 s
->userdata
= userdata
;
1325 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1326 s
->enabled
= SD_EVENT_ONESHOT
;
1328 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1340 _public_ sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
1341 assert_return(s
, NULL
);
1343 assert(s
->n_ref
>= 1);
1349 _public_ sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
1354 assert(s
->n_ref
>= 1);
1357 if (s
->n_ref
<= 0) {
1358 /* Here's a special hack: when we are called from a
1359 * dispatch handler we won't free the event source
1360 * immediately, but we will detach the fd from the
1361 * epoll. This way it is safe for the caller to unref
1362 * the event source and immediately close the fd, but
1363 * we still retain a valid event source object after
1366 if (s
->dispatching
) {
1367 if (s
->type
== SOURCE_IO
)
1368 source_io_unregister(s
);
1370 source_disconnect(s
);
1378 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1379 assert_return(s
, -EINVAL
);
1380 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1382 return free_and_strdup(&s
->description
, description
);
1385 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1386 assert_return(s
, -EINVAL
);
1387 assert_return(description
, -EINVAL
);
1388 assert_return(s
->description
, -ENXIO
);
1389 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1391 *description
= s
->description
;
1395 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1396 assert_return(s
, NULL
);
1401 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1402 assert_return(s
, -EINVAL
);
1403 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1404 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1405 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1410 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1411 assert_return(s
, -EINVAL
);
1412 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1413 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1418 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1421 assert_return(s
, -EINVAL
);
1422 assert_return(fd
>= 0, -EBADF
);
1423 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1424 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1429 if (s
->enabled
== SD_EVENT_OFF
) {
1431 s
->io
.registered
= false;
1435 saved_fd
= s
->io
.fd
;
1436 assert(s
->io
.registered
);
1439 s
->io
.registered
= false;
1441 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1443 s
->io
.fd
= saved_fd
;
1444 s
->io
.registered
= true;
1448 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1454 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1455 assert_return(s
, -EINVAL
);
1456 assert_return(events
, -EINVAL
);
1457 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1458 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1460 *events
= s
->io
.events
;
1464 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1467 assert_return(s
, -EINVAL
);
1468 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1469 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1470 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1471 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1473 /* edge-triggered updates are never skipped, so we can reset edges */
1474 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1477 if (s
->enabled
!= SD_EVENT_OFF
) {
1478 r
= source_io_register(s
, s
->enabled
, events
);
1483 s
->io
.events
= events
;
1484 source_set_pending(s
, false);
1489 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1490 assert_return(s
, -EINVAL
);
1491 assert_return(revents
, -EINVAL
);
1492 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1493 assert_return(s
->pending
, -ENODATA
);
1494 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1496 *revents
= s
->io
.revents
;
1500 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1501 assert_return(s
, -EINVAL
);
1502 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1503 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1505 return s
->signal
.sig
;
1508 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1509 assert_return(s
, -EINVAL
);
1510 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1515 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1518 assert_return(s
, -EINVAL
);
1519 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1520 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1522 if (s
->priority
== priority
)
1525 if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1526 struct signal_data
*old
, *d
;
1528 /* Move us from the signalfd belonging to the old
1529 * priority to the signalfd of the new priority */
1531 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1533 s
->priority
= priority
;
1535 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1537 s
->priority
= old
->priority
;
1541 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1543 s
->priority
= priority
;
1546 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1549 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1551 if (s
->type
== SOURCE_EXIT
)
1552 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1557 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1558 assert_return(s
, -EINVAL
);
1559 assert_return(m
, -EINVAL
);
1560 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1566 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1569 assert_return(s
, -EINVAL
);
1570 assert_return(m
== SD_EVENT_OFF
|| m
== SD_EVENT_ON
|| m
== SD_EVENT_ONESHOT
, -EINVAL
);
1571 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1573 /* If we are dead anyway, we are fine with turning off
1574 * sources, but everything else needs to fail. */
1575 if (s
->event
->state
== SD_EVENT_FINISHED
)
1576 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1578 if (s
->enabled
== m
)
1581 if (m
== SD_EVENT_OFF
) {
1586 source_io_unregister(s
);
1590 case SOURCE_TIME_REALTIME
:
1591 case SOURCE_TIME_BOOTTIME
:
1592 case SOURCE_TIME_MONOTONIC
:
1593 case SOURCE_TIME_REALTIME_ALARM
:
1594 case SOURCE_TIME_BOOTTIME_ALARM
: {
1595 struct clock_data
*d
;
1598 d
= event_get_clock_data(s
->event
, s
->type
);
1601 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1602 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1603 d
->needs_rearm
= true;
1610 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1616 assert(s
->event
->n_enabled_child_sources
> 0);
1617 s
->event
->n_enabled_child_sources
--;
1619 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1624 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1633 assert_not_reached("Wut? I shouldn't exist.");
1640 r
= source_io_register(s
, m
, s
->io
.events
);
1647 case SOURCE_TIME_REALTIME
:
1648 case SOURCE_TIME_BOOTTIME
:
1649 case SOURCE_TIME_MONOTONIC
:
1650 case SOURCE_TIME_REALTIME_ALARM
:
1651 case SOURCE_TIME_BOOTTIME_ALARM
: {
1652 struct clock_data
*d
;
1655 d
= event_get_clock_data(s
->event
, s
->type
);
1658 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1659 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1660 d
->needs_rearm
= true;
1668 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
1670 s
->enabled
= SD_EVENT_OFF
;
1671 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1679 if (s
->enabled
== SD_EVENT_OFF
)
1680 s
->event
->n_enabled_child_sources
++;
1684 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
1686 s
->enabled
= SD_EVENT_OFF
;
1687 s
->event
->n_enabled_child_sources
--;
1688 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1696 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1705 assert_not_reached("Wut? I shouldn't exist.");
1710 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1713 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1718 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
1719 assert_return(s
, -EINVAL
);
1720 assert_return(usec
, -EINVAL
);
1721 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1722 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1724 *usec
= s
->time
.next
;
1728 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
1729 struct clock_data
*d
;
1731 assert_return(s
, -EINVAL
);
1732 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1733 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1734 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1735 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1737 s
->time
.next
= usec
;
1739 source_set_pending(s
, false);
1741 d
= event_get_clock_data(s
->event
, s
->type
);
1744 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1745 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1746 d
->needs_rearm
= true;
1751 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
1752 assert_return(s
, -EINVAL
);
1753 assert_return(usec
, -EINVAL
);
1754 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1755 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1757 *usec
= s
->time
.accuracy
;
1761 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
1762 struct clock_data
*d
;
1764 assert_return(s
, -EINVAL
);
1765 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1766 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1767 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1768 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1771 usec
= DEFAULT_ACCURACY_USEC
;
1773 s
->time
.accuracy
= usec
;
1775 source_set_pending(s
, false);
1777 d
= event_get_clock_data(s
->event
, s
->type
);
1780 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1781 d
->needs_rearm
= true;
1786 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
1787 assert_return(s
, -EINVAL
);
1788 assert_return(clock
, -EINVAL
);
1789 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1790 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1792 *clock
= event_source_type_to_clock(s
->type
);
1796 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
1797 assert_return(s
, -EINVAL
);
1798 assert_return(pid
, -EINVAL
);
1799 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
1800 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1802 *pid
= s
->child
.pid
;
1806 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
1809 assert_return(s
, -EINVAL
);
1810 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1811 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1812 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1814 if (s
->prepare
== callback
)
1817 if (callback
&& s
->prepare
) {
1818 s
->prepare
= callback
;
1822 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
1826 s
->prepare
= callback
;
1829 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
1833 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1838 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
1839 assert_return(s
, NULL
);
1844 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
1847 assert_return(s
, NULL
);
1850 s
->userdata
= userdata
;
1855 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
1866 initialize_perturb(e
);
1869 Find a good time to wake up again between times a and b. We
1870 have two goals here:
1872 a) We want to wake up as seldom as possible, hence prefer
1873 later times over earlier times.
1875 b) But if we have to wake up, then let's make sure to
1876 dispatch as much as possible on the entire system.
1878 We implement this by waking up everywhere at the same time
1879 within any given minute if we can, synchronised via the
1880 perturbation value determined from the boot ID. If we can't,
1881 then we try to find the same spot in every 10s, then 1s and
1882 then 250ms step. Otherwise, we pick the last possible time
1886 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
1888 if (_unlikely_(c
< USEC_PER_MINUTE
))
1891 c
-= USEC_PER_MINUTE
;
1897 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
1899 if (_unlikely_(c
< USEC_PER_SEC
*10))
1902 c
-= USEC_PER_SEC
*10;
1908 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
1910 if (_unlikely_(c
< USEC_PER_SEC
))
1919 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
1921 if (_unlikely_(c
< USEC_PER_MSEC
*250))
1924 c
-= USEC_PER_MSEC
*250;
1933 static int event_arm_timer(
1935 struct clock_data
*d
) {
1937 struct itimerspec its
= {};
1938 sd_event_source
*a
, *b
;
1945 if (!d
->needs_rearm
)
1948 d
->needs_rearm
= false;
1950 a
= prioq_peek(d
->earliest
);
1951 if (!a
|| a
->enabled
== SD_EVENT_OFF
) {
1956 if (d
->next
== USEC_INFINITY
)
1960 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1964 d
->next
= USEC_INFINITY
;
1968 b
= prioq_peek(d
->latest
);
1969 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
1971 t
= sleep_between(e
, a
->time
.next
, b
->time
.next
+ b
->time
.accuracy
);
1975 assert_se(d
->fd
>= 0);
1978 /* We don' want to disarm here, just mean some time looooong ago. */
1979 its
.it_value
.tv_sec
= 0;
1980 its
.it_value
.tv_nsec
= 1;
1982 timespec_store(&its
.it_value
, t
);
1984 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1992 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
1995 assert(s
->type
== SOURCE_IO
);
1997 /* If the event source was already pending, we just OR in the
1998 * new revents, otherwise we reset the value. The ORing is
1999 * necessary to handle EPOLLONESHOT events properly where
2000 * readability might happen independently of writability, and
2001 * we need to keep track of both */
2004 s
->io
.revents
|= revents
;
2006 s
->io
.revents
= revents
;
2008 return source_set_pending(s
, true);
2011 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2018 assert_return(events
== EPOLLIN
, -EIO
);
2020 ss
= read(fd
, &x
, sizeof(x
));
2022 if (errno
== EAGAIN
|| errno
== EINTR
)
2028 if (_unlikely_(ss
!= sizeof(x
)))
2032 *next
= USEC_INFINITY
;
2037 static int process_timer(
2040 struct clock_data
*d
) {
2049 s
= prioq_peek(d
->earliest
);
2052 s
->enabled
== SD_EVENT_OFF
||
2056 r
= source_set_pending(s
, true);
2060 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2061 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2062 d
->needs_rearm
= true;
2068 static int process_child(sd_event
*e
) {
2075 e
->need_process_child
= false;
2078 So, this is ugly. We iteratively invoke waitid() with P_PID
2079 + WNOHANG for each PID we wait for, instead of using
2080 P_ALL. This is because we only want to get child
2081 information of very specific child processes, and not all
2082 of them. We might not have processed the SIGCHLD even of a
2083 previous invocation and we don't want to maintain a
2084 unbounded *per-child* event queue, hence we really don't
2085 want anything flushed out of the kernel's queue that we
2086 don't care about. Since this is O(n) this means that if you
2087 have a lot of processes you probably want to handle SIGCHLD
2090 We do not reap the children here (by using WNOWAIT), this
2091 is only done after the event source is dispatched so that
2092 the callback still sees the process as a zombie.
2095 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2096 assert(s
->type
== SOURCE_CHILD
);
2101 if (s
->enabled
== SD_EVENT_OFF
)
2104 zero(s
->child
.siginfo
);
2105 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2106 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2110 if (s
->child
.siginfo
.si_pid
!= 0) {
2112 s
->child
.siginfo
.si_code
== CLD_EXITED
||
2113 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2114 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2116 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2117 /* If the child isn't dead then let's
2118 * immediately remove the state change
2119 * from the queue, since there's no
2120 * benefit in leaving it queued */
2122 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2123 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2126 r
= source_set_pending(s
, true);
2135 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2136 bool read_one
= false;
2140 assert_return(events
== EPOLLIN
, -EIO
);
2142 /* If there's a signal queued on this priority and SIGCHLD is
2143 on this priority too, then make sure to recheck the
2144 children we watch. This is because we only ever dequeue
2145 the first signal per priority, and if we dequeue one, and
2146 SIGCHLD might be enqueued later we wouldn't know, but we
2147 might have higher priority children we care about hence we
2148 need to check that explicitly. */
2150 if (sigismember(&d
->sigset
, SIGCHLD
))
2151 e
->need_process_child
= true;
2153 /* If there's already an event source pending for this
2154 * priority we don't read another */
2159 struct signalfd_siginfo si
;
2161 sd_event_source
*s
= NULL
;
2163 n
= read(d
->fd
, &si
, sizeof(si
));
2165 if (errno
== EAGAIN
|| errno
== EINTR
)
2171 if (_unlikely_(n
!= sizeof(si
)))
2174 assert(si
.ssi_signo
< _NSIG
);
2178 if (e
->signal_sources
)
2179 s
= e
->signal_sources
[si
.ssi_signo
];
2185 s
->signal
.siginfo
= si
;
2188 r
= source_set_pending(s
, true);
2196 static int source_dispatch(sd_event_source
*s
) {
2200 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2202 if (s
->type
!= SOURCE_DEFER
&& s
->type
!= SOURCE_EXIT
) {
2203 r
= source_set_pending(s
, false);
2208 if (s
->type
!= SOURCE_POST
) {
2212 /* If we execute a non-post source, let's mark all
2213 * post sources as pending */
2215 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2216 if (z
->enabled
== SD_EVENT_OFF
)
2219 r
= source_set_pending(z
, true);
2225 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2226 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2231 s
->dispatching
= true;
2236 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2239 case SOURCE_TIME_REALTIME
:
2240 case SOURCE_TIME_BOOTTIME
:
2241 case SOURCE_TIME_MONOTONIC
:
2242 case SOURCE_TIME_REALTIME_ALARM
:
2243 case SOURCE_TIME_BOOTTIME_ALARM
:
2244 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2248 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2251 case SOURCE_CHILD
: {
2254 zombie
= s
->child
.siginfo
.si_code
== CLD_EXITED
||
2255 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2256 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2258 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2260 /* Now, reap the PID for good. */
2262 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2268 r
= s
->defer
.callback(s
, s
->userdata
);
2272 r
= s
->post
.callback(s
, s
->userdata
);
2276 r
= s
->exit
.callback(s
, s
->userdata
);
2279 case SOURCE_WATCHDOG
:
2280 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2281 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2282 assert_not_reached("Wut? I shouldn't exist.");
2285 s
->dispatching
= false;
2289 log_debug_errno(r
, "Event source '%s' returned error, disabling: %m", s
->description
);
2291 log_debug_errno(r
, "Event source %p returned error, disabling: %m", s
);
2297 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2302 static int event_prepare(sd_event
*e
) {
2310 s
= prioq_peek(e
->prepare
);
2311 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2314 s
->prepare_iteration
= e
->iteration
;
2315 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2321 s
->dispatching
= true;
2322 r
= s
->prepare(s
, s
->userdata
);
2323 s
->dispatching
= false;
2327 log_debug_errno(r
, "Prepare callback of event source '%s' returned error, disabling: %m", s
->description
);
2329 log_debug_errno(r
, "Prepare callback of event source %p returned error, disabling: %m", s
);
2335 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2341 static int dispatch_exit(sd_event
*e
) {
2347 p
= prioq_peek(e
->exit
);
2348 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2349 e
->state
= SD_EVENT_FINISHED
;
2355 e
->state
= SD_EVENT_EXITING
;
2357 r
= source_dispatch(p
);
2359 e
->state
= SD_EVENT_INITIAL
;
2365 static sd_event_source
* event_next_pending(sd_event
*e
) {
2370 p
= prioq_peek(e
->pending
);
2374 if (p
->enabled
== SD_EVENT_OFF
)
2380 static int arm_watchdog(sd_event
*e
) {
2381 struct itimerspec its
= {};
2386 assert(e
->watchdog_fd
>= 0);
2388 t
= sleep_between(e
,
2389 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2390 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2392 timespec_store(&its
.it_value
, t
);
2394 /* Make sure we never set the watchdog to 0, which tells the
2395 * kernel to disable it. */
2396 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
2397 its
.it_value
.tv_nsec
= 1;
2399 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2406 static int process_watchdog(sd_event
*e
) {
2412 /* Don't notify watchdog too often */
2413 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
2416 sd_notify(false, "WATCHDOG=1");
2417 e
->watchdog_last
= e
->timestamp
.monotonic
;
2419 return arm_watchdog(e
);
2422 _public_
int sd_event_prepare(sd_event
*e
) {
2425 assert_return(e
, -EINVAL
);
2426 assert_return(!event_pid_changed(e
), -ECHILD
);
2427 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2428 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2430 if (e
->exit_requested
)
2435 r
= event_prepare(e
);
2439 r
= event_arm_timer(e
, &e
->realtime
);
2443 r
= event_arm_timer(e
, &e
->boottime
);
2447 r
= event_arm_timer(e
, &e
->monotonic
);
2451 r
= event_arm_timer(e
, &e
->realtime_alarm
);
2455 r
= event_arm_timer(e
, &e
->boottime_alarm
);
2459 if (event_next_pending(e
) || e
->need_process_child
)
2462 e
->state
= SD_EVENT_ARMED
;
2467 e
->state
= SD_EVENT_ARMED
;
2468 r
= sd_event_wait(e
, 0);
2470 e
->state
= SD_EVENT_ARMED
;
2475 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
2476 struct epoll_event
*ev_queue
;
2477 unsigned ev_queue_max
;
2480 assert_return(e
, -EINVAL
);
2481 assert_return(!event_pid_changed(e
), -ECHILD
);
2482 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2483 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
2485 if (e
->exit_requested
) {
2486 e
->state
= SD_EVENT_PENDING
;
2490 ev_queue_max
= MAX(e
->n_sources
, 1u);
2491 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
2493 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
2494 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
2496 if (errno
== EINTR
) {
2497 e
->state
= SD_EVENT_PENDING
;
2505 dual_timestamp_get(&e
->timestamp
);
2506 e
->timestamp_boottime
= now(CLOCK_BOOTTIME
);
2508 for (i
= 0; i
< m
; i
++) {
2510 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
2511 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
2513 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
2517 case WAKEUP_EVENT_SOURCE
:
2518 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2521 case WAKEUP_CLOCK_DATA
: {
2522 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
2523 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
2527 case WAKEUP_SIGNAL_DATA
:
2528 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2532 assert_not_reached("Invalid wake-up pointer");
2539 r
= process_watchdog(e
);
2543 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
2547 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime
);
2551 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
2555 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
2559 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime_alarm
);
2563 if (e
->need_process_child
) {
2564 r
= process_child(e
);
2569 if (event_next_pending(e
)) {
2570 e
->state
= SD_EVENT_PENDING
;
2578 e
->state
= SD_EVENT_INITIAL
;
2583 _public_
int sd_event_dispatch(sd_event
*e
) {
2587 assert_return(e
, -EINVAL
);
2588 assert_return(!event_pid_changed(e
), -ECHILD
);
2589 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2590 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
2592 if (e
->exit_requested
)
2593 return dispatch_exit(e
);
2595 p
= event_next_pending(e
);
2599 e
->state
= SD_EVENT_RUNNING
;
2600 r
= source_dispatch(p
);
2601 e
->state
= SD_EVENT_INITIAL
;
2608 e
->state
= SD_EVENT_INITIAL
;
2613 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
2616 assert_return(e
, -EINVAL
);
2617 assert_return(!event_pid_changed(e
), -ECHILD
);
2618 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2619 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2621 r
= sd_event_prepare(e
);
2623 /* There was nothing? Then wait... */
2624 r
= sd_event_wait(e
, timeout
);
2627 /* There's something now, then let's dispatch it */
2628 r
= sd_event_dispatch(e
);
2638 _public_
int sd_event_loop(sd_event
*e
) {
2641 assert_return(e
, -EINVAL
);
2642 assert_return(!event_pid_changed(e
), -ECHILD
);
2643 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2647 while (e
->state
!= SD_EVENT_FINISHED
) {
2648 r
= sd_event_run(e
, (uint64_t) -1);
2660 _public_
int sd_event_get_fd(sd_event
*e
) {
2662 assert_return(e
, -EINVAL
);
2663 assert_return(!event_pid_changed(e
), -ECHILD
);
2668 _public_
int sd_event_get_state(sd_event
*e
) {
2669 assert_return(e
, -EINVAL
);
2670 assert_return(!event_pid_changed(e
), -ECHILD
);
2675 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
2676 assert_return(e
, -EINVAL
);
2677 assert_return(code
, -EINVAL
);
2678 assert_return(!event_pid_changed(e
), -ECHILD
);
2680 if (!e
->exit_requested
)
2683 *code
= e
->exit_code
;
2687 _public_
int sd_event_exit(sd_event
*e
, int code
) {
2688 assert_return(e
, -EINVAL
);
2689 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2690 assert_return(!event_pid_changed(e
), -ECHILD
);
2692 e
->exit_requested
= true;
2693 e
->exit_code
= code
;
2698 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
2699 assert_return(e
, -EINVAL
);
2700 assert_return(usec
, -EINVAL
);
2701 assert_return(!event_pid_changed(e
), -ECHILD
);
2703 if (!dual_timestamp_is_set(&e
->timestamp
)) {
2704 /* Implicitly fall back to now() if we never ran
2705 * before and thus have no cached time. */
2712 case CLOCK_REALTIME
:
2713 case CLOCK_REALTIME_ALARM
:
2714 *usec
= e
->timestamp
.realtime
;
2717 case CLOCK_MONOTONIC
:
2718 *usec
= e
->timestamp
.monotonic
;
2721 case CLOCK_BOOTTIME
:
2722 case CLOCK_BOOTTIME_ALARM
:
2723 *usec
= e
->timestamp_boottime
;
2730 _public_
int sd_event_default(sd_event
**ret
) {
2732 static thread_local sd_event
*default_event
= NULL
;
2737 return !!default_event
;
2739 if (default_event
) {
2740 *ret
= sd_event_ref(default_event
);
2744 r
= sd_event_new(&e
);
2748 e
->default_event_ptr
= &default_event
;
2756 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
2757 assert_return(e
, -EINVAL
);
2758 assert_return(tid
, -EINVAL
);
2759 assert_return(!event_pid_changed(e
), -ECHILD
);
2769 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
2772 assert_return(e
, -EINVAL
);
2773 assert_return(!event_pid_changed(e
), -ECHILD
);
2775 if (e
->watchdog
== !!b
)
2779 struct epoll_event ev
= {};
2781 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
2785 /* Issue first ping immediately */
2786 sd_notify(false, "WATCHDOG=1");
2787 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
2789 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
2790 if (e
->watchdog_fd
< 0)
2793 r
= arm_watchdog(e
);
2797 ev
.events
= EPOLLIN
;
2798 ev
.data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
);
2800 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
2807 if (e
->watchdog_fd
>= 0) {
2808 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
2809 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2817 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2821 _public_
int sd_event_get_watchdog(sd_event
*e
) {
2822 assert_return(e
, -EINVAL
);
2823 assert_return(!event_pid_changed(e
), -ECHILD
);