1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
27 #include "sd-daemon.h"
32 #include "time-util.h"
36 #include "signal-util.h"
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42 typedef enum EventSourceType
{
46 SOURCE_TIME_MONOTONIC
,
47 SOURCE_TIME_REALTIME_ALARM
,
48 SOURCE_TIME_BOOTTIME_ALARM
,
55 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
56 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
59 /* All objects we use in epoll events start with this value, so that
60 * we know how to dispatch it */
61 typedef enum WakeupType
{
67 _WAKEUP_TYPE_INVALID
= -1,
70 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
72 struct sd_event_source
{
79 sd_event_handler_t prepare
;
83 EventSourceType type
:5;
90 unsigned pending_index
;
91 unsigned prepare_index
;
92 unsigned pending_iteration
;
93 unsigned prepare_iteration
;
95 LIST_FIELDS(sd_event_source
, sources
);
99 sd_event_io_handler_t callback
;
106 sd_event_time_handler_t callback
;
107 usec_t next
, accuracy
;
108 unsigned earliest_index
;
109 unsigned latest_index
;
112 sd_event_signal_handler_t callback
;
113 struct signalfd_siginfo siginfo
;
117 sd_event_child_handler_t callback
;
123 sd_event_handler_t callback
;
126 sd_event_handler_t callback
;
129 sd_event_handler_t callback
;
130 unsigned prioq_index
;
139 /* For all clocks we maintain two priority queues each, one
140 * ordered for the earliest times the events may be
141 * dispatched, and one ordered by the latest times they must
142 * have been dispatched. The range between the top entries in
143 * the two prioqs is the time window we can freely schedule
156 /* For each priority we maintain one signal fd, so that we
157 * only have to dequeue a single event per priority at a
163 sd_event_source
*current
;
175 /* timerfd_create() only supports these five clocks so far. We
176 * can add support for more clocks when the kernel learns to
177 * deal with them, too. */
178 struct clock_data realtime
;
179 struct clock_data boottime
;
180 struct clock_data monotonic
;
181 struct clock_data realtime_alarm
;
182 struct clock_data boottime_alarm
;
186 sd_event_source
**signal_sources
; /* indexed by signal number */
187 Hashmap
*signal_data
; /* indexed by priority */
189 Hashmap
*child_sources
;
190 unsigned n_enabled_child_sources
;
199 dual_timestamp timestamp
;
200 usec_t timestamp_boottime
;
203 bool exit_requested
:1;
204 bool need_process_child
:1;
210 sd_event
**default_event_ptr
;
212 usec_t watchdog_last
, watchdog_period
;
216 LIST_HEAD(sd_event_source
, sources
);
219 static void source_disconnect(sd_event_source
*s
);
221 static int pending_prioq_compare(const void *a
, const void *b
) {
222 const sd_event_source
*x
= a
, *y
= b
;
227 /* Enabled ones first */
228 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
230 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
233 /* Lower priority values first */
234 if (x
->priority
< y
->priority
)
236 if (x
->priority
> y
->priority
)
239 /* Older entries first */
240 if (x
->pending_iteration
< y
->pending_iteration
)
242 if (x
->pending_iteration
> y
->pending_iteration
)
248 static int prepare_prioq_compare(const void *a
, const void *b
) {
249 const sd_event_source
*x
= a
, *y
= b
;
254 /* Enabled ones first */
255 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
257 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
260 /* Move most recently prepared ones last, so that we can stop
261 * preparing as soon as we hit one that has already been
262 * prepared in the current iteration */
263 if (x
->prepare_iteration
< y
->prepare_iteration
)
265 if (x
->prepare_iteration
> y
->prepare_iteration
)
268 /* Lower priority values first */
269 if (x
->priority
< y
->priority
)
271 if (x
->priority
> y
->priority
)
277 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
278 const sd_event_source
*x
= a
, *y
= b
;
280 assert(EVENT_SOURCE_IS_TIME(x
->type
));
281 assert(x
->type
== y
->type
);
283 /* Enabled ones first */
284 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
286 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
289 /* Move the pending ones to the end */
290 if (!x
->pending
&& y
->pending
)
292 if (x
->pending
&& !y
->pending
)
296 if (x
->time
.next
< y
->time
.next
)
298 if (x
->time
.next
> y
->time
.next
)
304 static int latest_time_prioq_compare(const void *a
, const void *b
) {
305 const sd_event_source
*x
= a
, *y
= b
;
307 assert(EVENT_SOURCE_IS_TIME(x
->type
));
308 assert(x
->type
== y
->type
);
310 /* Enabled ones first */
311 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
313 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
316 /* Move the pending ones to the end */
317 if (!x
->pending
&& y
->pending
)
319 if (x
->pending
&& !y
->pending
)
323 if (x
->time
.next
+ x
->time
.accuracy
< y
->time
.next
+ y
->time
.accuracy
)
325 if (x
->time
.next
+ x
->time
.accuracy
> y
->time
.next
+ y
->time
.accuracy
)
331 static int exit_prioq_compare(const void *a
, const void *b
) {
332 const sd_event_source
*x
= a
, *y
= b
;
334 assert(x
->type
== SOURCE_EXIT
);
335 assert(y
->type
== SOURCE_EXIT
);
337 /* Enabled ones first */
338 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
340 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
343 /* Lower priority values first */
344 if (x
->priority
< y
->priority
)
346 if (x
->priority
> y
->priority
)
352 static void free_clock_data(struct clock_data
*d
) {
354 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
357 prioq_free(d
->earliest
);
358 prioq_free(d
->latest
);
361 static void event_free(sd_event
*e
) {
366 while ((s
= e
->sources
)) {
368 source_disconnect(s
);
369 sd_event_source_unref(s
);
372 assert(e
->n_sources
== 0);
374 if (e
->default_event_ptr
)
375 *(e
->default_event_ptr
) = NULL
;
377 safe_close(e
->epoll_fd
);
378 safe_close(e
->watchdog_fd
);
380 free_clock_data(&e
->realtime
);
381 free_clock_data(&e
->boottime
);
382 free_clock_data(&e
->monotonic
);
383 free_clock_data(&e
->realtime_alarm
);
384 free_clock_data(&e
->boottime_alarm
);
386 prioq_free(e
->pending
);
387 prioq_free(e
->prepare
);
390 free(e
->signal_sources
);
391 hashmap_free(e
->signal_data
);
393 hashmap_free(e
->child_sources
);
394 set_free(e
->post_sources
);
398 _public_
int sd_event_new(sd_event
** ret
) {
402 assert_return(ret
, -EINVAL
);
404 e
= new0(sd_event
, 1);
409 e
->watchdog_fd
= e
->epoll_fd
= e
->realtime
.fd
= e
->boottime
.fd
= e
->monotonic
.fd
= e
->realtime_alarm
.fd
= e
->boottime_alarm
.fd
= -1;
410 e
->realtime
.next
= e
->boottime
.next
= e
->monotonic
.next
= e
->realtime_alarm
.next
= e
->boottime_alarm
.next
= USEC_INFINITY
;
411 e
->realtime
.wakeup
= e
->boottime
.wakeup
= e
->monotonic
.wakeup
= e
->realtime_alarm
.wakeup
= e
->boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
;
412 e
->original_pid
= getpid();
413 e
->perturb
= USEC_INFINITY
;
415 e
->pending
= prioq_new(pending_prioq_compare
);
421 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
422 if (e
->epoll_fd
< 0) {
435 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
436 assert_return(e
, NULL
);
438 assert(e
->n_ref
>= 1);
444 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
449 assert(e
->n_ref
>= 1);
458 static bool event_pid_changed(sd_event
*e
) {
461 /* We don't support people creating an event loop and keeping
462 * it around over a fork(). Let's complain. */
464 return e
->original_pid
!= getpid();
467 static void source_io_unregister(sd_event_source
*s
) {
471 assert(s
->type
== SOURCE_IO
);
473 if (event_pid_changed(s
->event
))
476 if (!s
->io
.registered
)
479 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
481 log_debug_errno(errno
, "Failed to remove source %s from epoll: %m", strna(s
->description
));
483 s
->io
.registered
= false;
486 static int source_io_register(
491 struct epoll_event ev
= {};
495 assert(s
->type
== SOURCE_IO
);
496 assert(enabled
!= SD_EVENT_OFF
);
501 if (enabled
== SD_EVENT_ONESHOT
)
502 ev
.events
|= EPOLLONESHOT
;
504 if (s
->io
.registered
)
505 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
507 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
511 s
->io
.registered
= true;
516 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
520 case SOURCE_TIME_REALTIME
:
521 return CLOCK_REALTIME
;
523 case SOURCE_TIME_BOOTTIME
:
524 return CLOCK_BOOTTIME
;
526 case SOURCE_TIME_MONOTONIC
:
527 return CLOCK_MONOTONIC
;
529 case SOURCE_TIME_REALTIME_ALARM
:
530 return CLOCK_REALTIME_ALARM
;
532 case SOURCE_TIME_BOOTTIME_ALARM
:
533 return CLOCK_BOOTTIME_ALARM
;
536 return (clockid_t
) -1;
540 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
545 return SOURCE_TIME_REALTIME
;
548 return SOURCE_TIME_BOOTTIME
;
550 case CLOCK_MONOTONIC
:
551 return SOURCE_TIME_MONOTONIC
;
553 case CLOCK_REALTIME_ALARM
:
554 return SOURCE_TIME_REALTIME_ALARM
;
556 case CLOCK_BOOTTIME_ALARM
:
557 return SOURCE_TIME_BOOTTIME_ALARM
;
560 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
564 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
569 case SOURCE_TIME_REALTIME
:
572 case SOURCE_TIME_BOOTTIME
:
575 case SOURCE_TIME_MONOTONIC
:
576 return &e
->monotonic
;
578 case SOURCE_TIME_REALTIME_ALARM
:
579 return &e
->realtime_alarm
;
581 case SOURCE_TIME_BOOTTIME_ALARM
:
582 return &e
->boottime_alarm
;
589 static int event_make_signal_data(
592 struct signal_data
**ret
) {
594 struct epoll_event ev
= {};
595 struct signal_data
*d
;
603 if (event_pid_changed(e
))
606 if (e
->signal_sources
&& e
->signal_sources
[sig
])
607 priority
= e
->signal_sources
[sig
]->priority
;
611 d
= hashmap_get(e
->signal_data
, &priority
);
613 if (sigismember(&d
->sigset
, sig
) > 0) {
619 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
623 d
= new0(struct signal_data
, 1);
627 d
->wakeup
= WAKEUP_SIGNAL_DATA
;
629 d
->priority
= priority
;
631 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
639 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
641 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
660 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
673 d
->fd
= safe_close(d
->fd
);
674 hashmap_remove(e
->signal_data
, &d
->priority
);
681 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
685 /* Turns off the specified signal in the signal data
686 * object. If the signal mask of the object becomes empty that
689 if (sigismember(&d
->sigset
, sig
) == 0)
692 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
694 if (sigisemptyset(&d
->sigset
)) {
696 /* If all the mask is all-zero we can get rid of the structure */
697 hashmap_remove(e
->signal_data
, &d
->priority
);
706 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
707 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
710 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
711 struct signal_data
*d
;
712 static const int64_t zero_priority
= 0;
716 /* Rechecks if the specified signal is still something we are
717 * interested in. If not, we'll unmask it, and possibly drop
718 * the signalfd for it. */
720 if (sig
== SIGCHLD
&&
721 e
->n_enabled_child_sources
> 0)
724 if (e
->signal_sources
&&
725 e
->signal_sources
[sig
] &&
726 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
730 * The specified signal might be enabled in three different queues:
732 * 1) the one that belongs to the priority passed (if it is non-NULL)
733 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
734 * 3) the 0 priority (to cover the SIGCHLD case)
736 * Hence, let's remove it from all three here.
740 d
= hashmap_get(e
->signal_data
, priority
);
742 event_unmask_signal_data(e
, d
, sig
);
745 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
746 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
748 event_unmask_signal_data(e
, d
, sig
);
751 d
= hashmap_get(e
->signal_data
, &zero_priority
);
753 event_unmask_signal_data(e
, d
, sig
);
756 static void source_disconnect(sd_event_source
*s
) {
764 assert(s
->event
->n_sources
> 0);
770 source_io_unregister(s
);
774 case SOURCE_TIME_REALTIME
:
775 case SOURCE_TIME_BOOTTIME
:
776 case SOURCE_TIME_MONOTONIC
:
777 case SOURCE_TIME_REALTIME_ALARM
:
778 case SOURCE_TIME_BOOTTIME_ALARM
: {
779 struct clock_data
*d
;
781 d
= event_get_clock_data(s
->event
, s
->type
);
784 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
785 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
786 d
->needs_rearm
= true;
791 if (s
->signal
.sig
> 0) {
793 if (s
->event
->signal_sources
)
794 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
796 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
802 if (s
->child
.pid
> 0) {
803 if (s
->enabled
!= SD_EVENT_OFF
) {
804 assert(s
->event
->n_enabled_child_sources
> 0);
805 s
->event
->n_enabled_child_sources
--;
808 (void) hashmap_remove(s
->event
->child_sources
, INT_TO_PTR(s
->child
.pid
));
809 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
819 set_remove(s
->event
->post_sources
, s
);
823 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
827 assert_not_reached("Wut? I shouldn't exist.");
831 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
834 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
838 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
840 LIST_REMOVE(sources
, event
->sources
, s
);
844 sd_event_unref(event
);
847 static void source_free(sd_event_source
*s
) {
850 source_disconnect(s
);
851 free(s
->description
);
855 static int source_set_pending(sd_event_source
*s
, bool b
) {
859 assert(s
->type
!= SOURCE_EXIT
);
867 s
->pending_iteration
= s
->event
->iteration
;
869 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
875 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
877 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
878 struct clock_data
*d
;
880 d
= event_get_clock_data(s
->event
, s
->type
);
883 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
884 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
885 d
->needs_rearm
= true;
888 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
889 struct signal_data
*d
;
891 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
892 if (d
&& d
->current
== s
)
899 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
904 s
= new0(sd_event_source
, 1);
910 s
->floating
= floating
;
912 s
->pending_index
= s
->prepare_index
= PRIOQ_IDX_NULL
;
917 LIST_PREPEND(sources
, e
->sources
, s
);
923 _public_
int sd_event_add_io(
925 sd_event_source
**ret
,
928 sd_event_io_handler_t callback
,
934 assert_return(e
, -EINVAL
);
935 assert_return(fd
>= 0, -EBADF
);
936 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
937 assert_return(callback
, -EINVAL
);
938 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
939 assert_return(!event_pid_changed(e
), -ECHILD
);
941 s
= source_new(e
, !ret
, SOURCE_IO
);
945 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
947 s
->io
.events
= events
;
948 s
->io
.callback
= callback
;
949 s
->userdata
= userdata
;
950 s
->enabled
= SD_EVENT_ON
;
952 r
= source_io_register(s
, s
->enabled
, events
);
964 static void initialize_perturb(sd_event
*e
) {
965 sd_id128_t bootid
= {};
967 /* When we sleep for longer, we try to realign the wakeup to
968 the same time wihtin each minute/second/250ms, so that
969 events all across the system can be coalesced into a single
970 CPU wakeup. However, let's take some system-specific
971 randomness for this value, so that in a network of systems
972 with synced clocks timer events are distributed a
973 bit. Here, we calculate a perturbation usec offset from the
976 if (_likely_(e
->perturb
!= USEC_INFINITY
))
979 if (sd_id128_get_boot(&bootid
) >= 0)
980 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
983 static int event_setup_timer_fd(
985 struct clock_data
*d
,
988 struct epoll_event ev
= {};
994 if (_likely_(d
->fd
>= 0))
997 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1001 ev
.events
= EPOLLIN
;
1004 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1014 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1017 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1020 _public_
int sd_event_add_time(
1022 sd_event_source
**ret
,
1026 sd_event_time_handler_t callback
,
1029 EventSourceType type
;
1031 struct clock_data
*d
;
1034 assert_return(e
, -EINVAL
);
1035 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1036 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1037 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1038 assert_return(!event_pid_changed(e
), -ECHILD
);
1041 callback
= time_exit_callback
;
1043 type
= clock_to_event_source_type(clock
);
1044 assert_return(type
>= 0, -EOPNOTSUPP
);
1046 d
= event_get_clock_data(e
, type
);
1050 d
->earliest
= prioq_new(earliest_time_prioq_compare
);
1056 d
->latest
= prioq_new(latest_time_prioq_compare
);
1062 r
= event_setup_timer_fd(e
, d
, clock
);
1067 s
= source_new(e
, !ret
, type
);
1071 s
->time
.next
= usec
;
1072 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1073 s
->time
.callback
= callback
;
1074 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1075 s
->userdata
= userdata
;
1076 s
->enabled
= SD_EVENT_ONESHOT
;
1078 d
->needs_rearm
= true;
1080 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1084 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1098 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1101 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1104 _public_
int sd_event_add_signal(
1106 sd_event_source
**ret
,
1108 sd_event_signal_handler_t callback
,
1112 struct signal_data
*d
;
1116 assert_return(e
, -EINVAL
);
1117 assert_return(sig
> 0, -EINVAL
);
1118 assert_return(sig
< _NSIG
, -EINVAL
);
1119 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1120 assert_return(!event_pid_changed(e
), -ECHILD
);
1123 callback
= signal_exit_callback
;
1125 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1129 if (!sigismember(&ss
, sig
))
1132 if (!e
->signal_sources
) {
1133 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1134 if (!e
->signal_sources
)
1136 } else if (e
->signal_sources
[sig
])
1139 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1143 s
->signal
.sig
= sig
;
1144 s
->signal
.callback
= callback
;
1145 s
->userdata
= userdata
;
1146 s
->enabled
= SD_EVENT_ON
;
1148 e
->signal_sources
[sig
] = s
;
1150 r
= event_make_signal_data(e
, sig
, &d
);
1156 /* Use the signal name as description for the event source by default */
1157 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1165 _public_
int sd_event_add_child(
1167 sd_event_source
**ret
,
1170 sd_event_child_handler_t callback
,
1176 assert_return(e
, -EINVAL
);
1177 assert_return(pid
> 1, -EINVAL
);
1178 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1179 assert_return(options
!= 0, -EINVAL
);
1180 assert_return(callback
, -EINVAL
);
1181 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1182 assert_return(!event_pid_changed(e
), -ECHILD
);
1184 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1188 if (hashmap_contains(e
->child_sources
, INT_TO_PTR(pid
)))
1191 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1196 s
->child
.options
= options
;
1197 s
->child
.callback
= callback
;
1198 s
->userdata
= userdata
;
1199 s
->enabled
= SD_EVENT_ONESHOT
;
1201 r
= hashmap_put(e
->child_sources
, INT_TO_PTR(pid
), s
);
1207 e
->n_enabled_child_sources
++;
1209 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1211 e
->n_enabled_child_sources
--;
1216 e
->need_process_child
= true;
1224 _public_
int sd_event_add_defer(
1226 sd_event_source
**ret
,
1227 sd_event_handler_t callback
,
1233 assert_return(e
, -EINVAL
);
1234 assert_return(callback
, -EINVAL
);
1235 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1236 assert_return(!event_pid_changed(e
), -ECHILD
);
1238 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1242 s
->defer
.callback
= callback
;
1243 s
->userdata
= userdata
;
1244 s
->enabled
= SD_EVENT_ONESHOT
;
1246 r
= source_set_pending(s
, true);
1258 _public_
int sd_event_add_post(
1260 sd_event_source
**ret
,
1261 sd_event_handler_t callback
,
1267 assert_return(e
, -EINVAL
);
1268 assert_return(callback
, -EINVAL
);
1269 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1270 assert_return(!event_pid_changed(e
), -ECHILD
);
1272 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1276 s
= source_new(e
, !ret
, SOURCE_POST
);
1280 s
->post
.callback
= callback
;
1281 s
->userdata
= userdata
;
1282 s
->enabled
= SD_EVENT_ON
;
1284 r
= set_put(e
->post_sources
, s
);
1296 _public_
int sd_event_add_exit(
1298 sd_event_source
**ret
,
1299 sd_event_handler_t callback
,
1305 assert_return(e
, -EINVAL
);
1306 assert_return(callback
, -EINVAL
);
1307 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1308 assert_return(!event_pid_changed(e
), -ECHILD
);
1311 e
->exit
= prioq_new(exit_prioq_compare
);
1316 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1320 s
->exit
.callback
= callback
;
1321 s
->userdata
= userdata
;
1322 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1323 s
->enabled
= SD_EVENT_ONESHOT
;
1325 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1337 _public_ sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
1338 assert_return(s
, NULL
);
1340 assert(s
->n_ref
>= 1);
1346 _public_ sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
1351 assert(s
->n_ref
>= 1);
1354 if (s
->n_ref
<= 0) {
1355 /* Here's a special hack: when we are called from a
1356 * dispatch handler we won't free the event source
1357 * immediately, but we will detach the fd from the
1358 * epoll. This way it is safe for the caller to unref
1359 * the event source and immediately close the fd, but
1360 * we still retain a valid event source object after
1363 if (s
->dispatching
) {
1364 if (s
->type
== SOURCE_IO
)
1365 source_io_unregister(s
);
1367 source_disconnect(s
);
1375 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1376 assert_return(s
, -EINVAL
);
1377 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1379 return free_and_strdup(&s
->description
, description
);
1382 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1383 assert_return(s
, -EINVAL
);
1384 assert_return(description
, -EINVAL
);
1385 assert_return(s
->description
, -ENXIO
);
1386 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1388 *description
= s
->description
;
1392 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1393 assert_return(s
, NULL
);
1398 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1399 assert_return(s
, -EINVAL
);
1400 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1401 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1402 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1407 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1408 assert_return(s
, -EINVAL
);
1409 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1410 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1415 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1418 assert_return(s
, -EINVAL
);
1419 assert_return(fd
>= 0, -EBADF
);
1420 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1421 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1426 if (s
->enabled
== SD_EVENT_OFF
) {
1428 s
->io
.registered
= false;
1432 saved_fd
= s
->io
.fd
;
1433 assert(s
->io
.registered
);
1436 s
->io
.registered
= false;
1438 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1440 s
->io
.fd
= saved_fd
;
1441 s
->io
.registered
= true;
1445 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1451 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1452 assert_return(s
, -EINVAL
);
1453 assert_return(events
, -EINVAL
);
1454 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1455 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1457 *events
= s
->io
.events
;
1461 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1464 assert_return(s
, -EINVAL
);
1465 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1466 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1467 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1468 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1470 /* edge-triggered updates are never skipped, so we can reset edges */
1471 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1474 if (s
->enabled
!= SD_EVENT_OFF
) {
1475 r
= source_io_register(s
, s
->enabled
, events
);
1480 s
->io
.events
= events
;
1481 source_set_pending(s
, false);
1486 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1487 assert_return(s
, -EINVAL
);
1488 assert_return(revents
, -EINVAL
);
1489 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1490 assert_return(s
->pending
, -ENODATA
);
1491 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1493 *revents
= s
->io
.revents
;
1497 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1498 assert_return(s
, -EINVAL
);
1499 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1500 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1502 return s
->signal
.sig
;
1505 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1506 assert_return(s
, -EINVAL
);
1507 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1512 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1515 assert_return(s
, -EINVAL
);
1516 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1517 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1519 if (s
->priority
== priority
)
1522 if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1523 struct signal_data
*old
, *d
;
1525 /* Move us from the signalfd belonging to the old
1526 * priority to the signalfd of the new priority */
1528 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1530 s
->priority
= priority
;
1532 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1534 s
->priority
= old
->priority
;
1538 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1540 s
->priority
= priority
;
1543 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1546 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1548 if (s
->type
== SOURCE_EXIT
)
1549 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1554 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1555 assert_return(s
, -EINVAL
);
1556 assert_return(m
, -EINVAL
);
1557 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1563 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1566 assert_return(s
, -EINVAL
);
1567 assert_return(m
== SD_EVENT_OFF
|| m
== SD_EVENT_ON
|| m
== SD_EVENT_ONESHOT
, -EINVAL
);
1568 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1570 /* If we are dead anyway, we are fine with turning off
1571 * sources, but everything else needs to fail. */
1572 if (s
->event
->state
== SD_EVENT_FINISHED
)
1573 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1575 if (s
->enabled
== m
)
1578 if (m
== SD_EVENT_OFF
) {
1583 source_io_unregister(s
);
1587 case SOURCE_TIME_REALTIME
:
1588 case SOURCE_TIME_BOOTTIME
:
1589 case SOURCE_TIME_MONOTONIC
:
1590 case SOURCE_TIME_REALTIME_ALARM
:
1591 case SOURCE_TIME_BOOTTIME_ALARM
: {
1592 struct clock_data
*d
;
1595 d
= event_get_clock_data(s
->event
, s
->type
);
1598 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1599 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1600 d
->needs_rearm
= true;
1607 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1613 assert(s
->event
->n_enabled_child_sources
> 0);
1614 s
->event
->n_enabled_child_sources
--;
1616 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1621 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1630 assert_not_reached("Wut? I shouldn't exist.");
1637 r
= source_io_register(s
, m
, s
->io
.events
);
1644 case SOURCE_TIME_REALTIME
:
1645 case SOURCE_TIME_BOOTTIME
:
1646 case SOURCE_TIME_MONOTONIC
:
1647 case SOURCE_TIME_REALTIME_ALARM
:
1648 case SOURCE_TIME_BOOTTIME_ALARM
: {
1649 struct clock_data
*d
;
1652 d
= event_get_clock_data(s
->event
, s
->type
);
1655 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1656 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1657 d
->needs_rearm
= true;
1665 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
1667 s
->enabled
= SD_EVENT_OFF
;
1668 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1676 if (s
->enabled
== SD_EVENT_OFF
)
1677 s
->event
->n_enabled_child_sources
++;
1681 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
1683 s
->enabled
= SD_EVENT_OFF
;
1684 s
->event
->n_enabled_child_sources
--;
1685 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1693 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1702 assert_not_reached("Wut? I shouldn't exist.");
1707 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1710 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1715 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
1716 assert_return(s
, -EINVAL
);
1717 assert_return(usec
, -EINVAL
);
1718 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1719 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1721 *usec
= s
->time
.next
;
1725 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
1726 struct clock_data
*d
;
1728 assert_return(s
, -EINVAL
);
1729 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1730 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1731 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1732 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1734 s
->time
.next
= usec
;
1736 source_set_pending(s
, false);
1738 d
= event_get_clock_data(s
->event
, s
->type
);
1741 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1742 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1743 d
->needs_rearm
= true;
1748 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
1749 assert_return(s
, -EINVAL
);
1750 assert_return(usec
, -EINVAL
);
1751 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1752 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1754 *usec
= s
->time
.accuracy
;
1758 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
1759 struct clock_data
*d
;
1761 assert_return(s
, -EINVAL
);
1762 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1763 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1764 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1765 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1768 usec
= DEFAULT_ACCURACY_USEC
;
1770 s
->time
.accuracy
= usec
;
1772 source_set_pending(s
, false);
1774 d
= event_get_clock_data(s
->event
, s
->type
);
1777 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1778 d
->needs_rearm
= true;
1783 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
1784 assert_return(s
, -EINVAL
);
1785 assert_return(clock
, -EINVAL
);
1786 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1787 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1789 *clock
= event_source_type_to_clock(s
->type
);
1793 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
1794 assert_return(s
, -EINVAL
);
1795 assert_return(pid
, -EINVAL
);
1796 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
1797 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1799 *pid
= s
->child
.pid
;
1803 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
1806 assert_return(s
, -EINVAL
);
1807 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1808 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1809 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1811 if (s
->prepare
== callback
)
1814 if (callback
&& s
->prepare
) {
1815 s
->prepare
= callback
;
1819 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
1823 s
->prepare
= callback
;
1826 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
1830 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1835 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
1836 assert_return(s
, NULL
);
1841 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
1844 assert_return(s
, NULL
);
1847 s
->userdata
= userdata
;
1852 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
1863 initialize_perturb(e
);
1866 Find a good time to wake up again between times a and b. We
1867 have two goals here:
1869 a) We want to wake up as seldom as possible, hence prefer
1870 later times over earlier times.
1872 b) But if we have to wake up, then let's make sure to
1873 dispatch as much as possible on the entire system.
1875 We implement this by waking up everywhere at the same time
1876 within any given minute if we can, synchronised via the
1877 perturbation value determined from the boot ID. If we can't,
1878 then we try to find the same spot in every 10s, then 1s and
1879 then 250ms step. Otherwise, we pick the last possible time
1883 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
1885 if (_unlikely_(c
< USEC_PER_MINUTE
))
1888 c
-= USEC_PER_MINUTE
;
1894 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
1896 if (_unlikely_(c
< USEC_PER_SEC
*10))
1899 c
-= USEC_PER_SEC
*10;
1905 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
1907 if (_unlikely_(c
< USEC_PER_SEC
))
1916 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
1918 if (_unlikely_(c
< USEC_PER_MSEC
*250))
1921 c
-= USEC_PER_MSEC
*250;
1930 static int event_arm_timer(
1932 struct clock_data
*d
) {
1934 struct itimerspec its
= {};
1935 sd_event_source
*a
, *b
;
1942 if (!d
->needs_rearm
)
1945 d
->needs_rearm
= false;
1947 a
= prioq_peek(d
->earliest
);
1948 if (!a
|| a
->enabled
== SD_EVENT_OFF
) {
1953 if (d
->next
== USEC_INFINITY
)
1957 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1961 d
->next
= USEC_INFINITY
;
1965 b
= prioq_peek(d
->latest
);
1966 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
1968 t
= sleep_between(e
, a
->time
.next
, b
->time
.next
+ b
->time
.accuracy
);
1972 assert_se(d
->fd
>= 0);
1975 /* We don' want to disarm here, just mean some time looooong ago. */
1976 its
.it_value
.tv_sec
= 0;
1977 its
.it_value
.tv_nsec
= 1;
1979 timespec_store(&its
.it_value
, t
);
1981 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1989 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
1992 assert(s
->type
== SOURCE_IO
);
1994 /* If the event source was already pending, we just OR in the
1995 * new revents, otherwise we reset the value. The ORing is
1996 * necessary to handle EPOLLONESHOT events properly where
1997 * readability might happen independently of writability, and
1998 * we need to keep track of both */
2001 s
->io
.revents
|= revents
;
2003 s
->io
.revents
= revents
;
2005 return source_set_pending(s
, true);
2008 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2015 assert_return(events
== EPOLLIN
, -EIO
);
2017 ss
= read(fd
, &x
, sizeof(x
));
2019 if (errno
== EAGAIN
|| errno
== EINTR
)
2025 if (_unlikely_(ss
!= sizeof(x
)))
2029 *next
= USEC_INFINITY
;
2034 static int process_timer(
2037 struct clock_data
*d
) {
2046 s
= prioq_peek(d
->earliest
);
2049 s
->enabled
== SD_EVENT_OFF
||
2053 r
= source_set_pending(s
, true);
2057 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2058 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2059 d
->needs_rearm
= true;
2065 static int process_child(sd_event
*e
) {
2072 e
->need_process_child
= false;
2075 So, this is ugly. We iteratively invoke waitid() with P_PID
2076 + WNOHANG for each PID we wait for, instead of using
2077 P_ALL. This is because we only want to get child
2078 information of very specific child processes, and not all
2079 of them. We might not have processed the SIGCHLD even of a
2080 previous invocation and we don't want to maintain a
2081 unbounded *per-child* event queue, hence we really don't
2082 want anything flushed out of the kernel's queue that we
2083 don't care about. Since this is O(n) this means that if you
2084 have a lot of processes you probably want to handle SIGCHLD
2087 We do not reap the children here (by using WNOWAIT), this
2088 is only done after the event source is dispatched so that
2089 the callback still sees the process as a zombie.
2092 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2093 assert(s
->type
== SOURCE_CHILD
);
2098 if (s
->enabled
== SD_EVENT_OFF
)
2101 zero(s
->child
.siginfo
);
2102 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2103 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2107 if (s
->child
.siginfo
.si_pid
!= 0) {
2109 s
->child
.siginfo
.si_code
== CLD_EXITED
||
2110 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2111 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2113 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2114 /* If the child isn't dead then let's
2115 * immediately remove the state change
2116 * from the queue, since there's no
2117 * benefit in leaving it queued */
2119 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2120 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2123 r
= source_set_pending(s
, true);
2132 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2133 bool read_one
= false;
2137 assert_return(events
== EPOLLIN
, -EIO
);
2139 /* If there's a signal queued on this priority and SIGCHLD is
2140 on this priority too, then make sure to recheck the
2141 children we watch. This is because we only ever dequeue
2142 the first signal per priority, and if we dequeue one, and
2143 SIGCHLD might be enqueued later we wouldn't know, but we
2144 might have higher priority children we care about hence we
2145 need to check that explicitly. */
2147 if (sigismember(&d
->sigset
, SIGCHLD
))
2148 e
->need_process_child
= true;
2150 /* If there's already an event source pending for this
2151 * priority we don't read another */
2156 struct signalfd_siginfo si
;
2158 sd_event_source
*s
= NULL
;
2160 n
= read(d
->fd
, &si
, sizeof(si
));
2162 if (errno
== EAGAIN
|| errno
== EINTR
)
2168 if (_unlikely_(n
!= sizeof(si
)))
2171 assert(si
.ssi_signo
< _NSIG
);
2175 if (e
->signal_sources
)
2176 s
= e
->signal_sources
[si
.ssi_signo
];
2182 s
->signal
.siginfo
= si
;
2185 r
= source_set_pending(s
, true);
2193 static int source_dispatch(sd_event_source
*s
) {
2197 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2199 if (s
->type
!= SOURCE_DEFER
&& s
->type
!= SOURCE_EXIT
) {
2200 r
= source_set_pending(s
, false);
2205 if (s
->type
!= SOURCE_POST
) {
2209 /* If we execute a non-post source, let's mark all
2210 * post sources as pending */
2212 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2213 if (z
->enabled
== SD_EVENT_OFF
)
2216 r
= source_set_pending(z
, true);
2222 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2223 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2228 s
->dispatching
= true;
2233 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2236 case SOURCE_TIME_REALTIME
:
2237 case SOURCE_TIME_BOOTTIME
:
2238 case SOURCE_TIME_MONOTONIC
:
2239 case SOURCE_TIME_REALTIME_ALARM
:
2240 case SOURCE_TIME_BOOTTIME_ALARM
:
2241 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2245 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2248 case SOURCE_CHILD
: {
2251 zombie
= s
->child
.siginfo
.si_code
== CLD_EXITED
||
2252 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2253 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2255 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2257 /* Now, reap the PID for good. */
2259 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2265 r
= s
->defer
.callback(s
, s
->userdata
);
2269 r
= s
->post
.callback(s
, s
->userdata
);
2273 r
= s
->exit
.callback(s
, s
->userdata
);
2276 case SOURCE_WATCHDOG
:
2277 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2278 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2279 assert_not_reached("Wut? I shouldn't exist.");
2282 s
->dispatching
= false;
2286 log_debug_errno(r
, "Event source '%s' returned error, disabling: %m", s
->description
);
2288 log_debug_errno(r
, "Event source %p returned error, disabling: %m", s
);
2294 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2299 static int event_prepare(sd_event
*e
) {
2307 s
= prioq_peek(e
->prepare
);
2308 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2311 s
->prepare_iteration
= e
->iteration
;
2312 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2318 s
->dispatching
= true;
2319 r
= s
->prepare(s
, s
->userdata
);
2320 s
->dispatching
= false;
2324 log_debug_errno(r
, "Prepare callback of event source '%s' returned error, disabling: %m", s
->description
);
2326 log_debug_errno(r
, "Prepare callback of event source %p returned error, disabling: %m", s
);
2332 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2338 static int dispatch_exit(sd_event
*e
) {
2344 p
= prioq_peek(e
->exit
);
2345 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2346 e
->state
= SD_EVENT_FINISHED
;
2352 e
->state
= SD_EVENT_EXITING
;
2354 r
= source_dispatch(p
);
2356 e
->state
= SD_EVENT_INITIAL
;
2362 static sd_event_source
* event_next_pending(sd_event
*e
) {
2367 p
= prioq_peek(e
->pending
);
2371 if (p
->enabled
== SD_EVENT_OFF
)
2377 static int arm_watchdog(sd_event
*e
) {
2378 struct itimerspec its
= {};
2383 assert(e
->watchdog_fd
>= 0);
2385 t
= sleep_between(e
,
2386 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2387 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2389 timespec_store(&its
.it_value
, t
);
2391 /* Make sure we never set the watchdog to 0, which tells the
2392 * kernel to disable it. */
2393 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
2394 its
.it_value
.tv_nsec
= 1;
2396 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2403 static int process_watchdog(sd_event
*e
) {
2409 /* Don't notify watchdog too often */
2410 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
2413 sd_notify(false, "WATCHDOG=1");
2414 e
->watchdog_last
= e
->timestamp
.monotonic
;
2416 return arm_watchdog(e
);
2419 _public_
int sd_event_prepare(sd_event
*e
) {
2422 assert_return(e
, -EINVAL
);
2423 assert_return(!event_pid_changed(e
), -ECHILD
);
2424 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2425 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2427 if (e
->exit_requested
)
2432 r
= event_prepare(e
);
2436 r
= event_arm_timer(e
, &e
->realtime
);
2440 r
= event_arm_timer(e
, &e
->boottime
);
2444 r
= event_arm_timer(e
, &e
->monotonic
);
2448 r
= event_arm_timer(e
, &e
->realtime_alarm
);
2452 r
= event_arm_timer(e
, &e
->boottime_alarm
);
2456 if (event_next_pending(e
) || e
->need_process_child
)
2459 e
->state
= SD_EVENT_ARMED
;
2464 e
->state
= SD_EVENT_ARMED
;
2465 r
= sd_event_wait(e
, 0);
2467 e
->state
= SD_EVENT_ARMED
;
2472 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
2473 struct epoll_event
*ev_queue
;
2474 unsigned ev_queue_max
;
2477 assert_return(e
, -EINVAL
);
2478 assert_return(!event_pid_changed(e
), -ECHILD
);
2479 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2480 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
2482 if (e
->exit_requested
) {
2483 e
->state
= SD_EVENT_PENDING
;
2487 ev_queue_max
= MAX(e
->n_sources
, 1u);
2488 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
2490 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
2491 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
2493 if (errno
== EINTR
) {
2494 e
->state
= SD_EVENT_PENDING
;
2502 dual_timestamp_get(&e
->timestamp
);
2503 e
->timestamp_boottime
= now(CLOCK_BOOTTIME
);
2505 for (i
= 0; i
< m
; i
++) {
2507 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
2508 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
2510 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
2514 case WAKEUP_EVENT_SOURCE
:
2515 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2518 case WAKEUP_CLOCK_DATA
: {
2519 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
2520 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
2524 case WAKEUP_SIGNAL_DATA
:
2525 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2529 assert_not_reached("Invalid wake-up pointer");
2536 r
= process_watchdog(e
);
2540 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
2544 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime
);
2548 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
2552 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
2556 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime_alarm
);
2560 if (e
->need_process_child
) {
2561 r
= process_child(e
);
2566 if (event_next_pending(e
)) {
2567 e
->state
= SD_EVENT_PENDING
;
2575 e
->state
= SD_EVENT_INITIAL
;
2580 _public_
int sd_event_dispatch(sd_event
*e
) {
2584 assert_return(e
, -EINVAL
);
2585 assert_return(!event_pid_changed(e
), -ECHILD
);
2586 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2587 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
2589 if (e
->exit_requested
)
2590 return dispatch_exit(e
);
2592 p
= event_next_pending(e
);
2596 e
->state
= SD_EVENT_RUNNING
;
2597 r
= source_dispatch(p
);
2598 e
->state
= SD_EVENT_INITIAL
;
2605 e
->state
= SD_EVENT_INITIAL
;
2610 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
2613 assert_return(e
, -EINVAL
);
2614 assert_return(!event_pid_changed(e
), -ECHILD
);
2615 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2616 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2618 r
= sd_event_prepare(e
);
2620 /* There was nothing? Then wait... */
2621 r
= sd_event_wait(e
, timeout
);
2624 /* There's something now, then let's dispatch it */
2625 r
= sd_event_dispatch(e
);
2635 _public_
int sd_event_loop(sd_event
*e
) {
2638 assert_return(e
, -EINVAL
);
2639 assert_return(!event_pid_changed(e
), -ECHILD
);
2640 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2644 while (e
->state
!= SD_EVENT_FINISHED
) {
2645 r
= sd_event_run(e
, (uint64_t) -1);
2657 _public_
int sd_event_get_fd(sd_event
*e
) {
2659 assert_return(e
, -EINVAL
);
2660 assert_return(!event_pid_changed(e
), -ECHILD
);
2665 _public_
int sd_event_get_state(sd_event
*e
) {
2666 assert_return(e
, -EINVAL
);
2667 assert_return(!event_pid_changed(e
), -ECHILD
);
2672 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
2673 assert_return(e
, -EINVAL
);
2674 assert_return(code
, -EINVAL
);
2675 assert_return(!event_pid_changed(e
), -ECHILD
);
2677 if (!e
->exit_requested
)
2680 *code
= e
->exit_code
;
2684 _public_
int sd_event_exit(sd_event
*e
, int code
) {
2685 assert_return(e
, -EINVAL
);
2686 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2687 assert_return(!event_pid_changed(e
), -ECHILD
);
2689 e
->exit_requested
= true;
2690 e
->exit_code
= code
;
2695 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
2696 assert_return(e
, -EINVAL
);
2697 assert_return(usec
, -EINVAL
);
2698 assert_return(!event_pid_changed(e
), -ECHILD
);
2700 if (!dual_timestamp_is_set(&e
->timestamp
)) {
2701 /* Implicitly fall back to now() if we never ran
2702 * before and thus have no cached time. */
2709 case CLOCK_REALTIME
:
2710 case CLOCK_REALTIME_ALARM
:
2711 *usec
= e
->timestamp
.realtime
;
2714 case CLOCK_MONOTONIC
:
2715 *usec
= e
->timestamp
.monotonic
;
2718 case CLOCK_BOOTTIME
:
2719 case CLOCK_BOOTTIME_ALARM
:
2720 *usec
= e
->timestamp_boottime
;
2727 _public_
int sd_event_default(sd_event
**ret
) {
2729 static thread_local sd_event
*default_event
= NULL
;
2734 return !!default_event
;
2736 if (default_event
) {
2737 *ret
= sd_event_ref(default_event
);
2741 r
= sd_event_new(&e
);
2745 e
->default_event_ptr
= &default_event
;
2753 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
2754 assert_return(e
, -EINVAL
);
2755 assert_return(tid
, -EINVAL
);
2756 assert_return(!event_pid_changed(e
), -ECHILD
);
2766 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
2769 assert_return(e
, -EINVAL
);
2770 assert_return(!event_pid_changed(e
), -ECHILD
);
2772 if (e
->watchdog
== !!b
)
2776 struct epoll_event ev
= {};
2778 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
2782 /* Issue first ping immediately */
2783 sd_notify(false, "WATCHDOG=1");
2784 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
2786 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
2787 if (e
->watchdog_fd
< 0)
2790 r
= arm_watchdog(e
);
2794 ev
.events
= EPOLLIN
;
2795 ev
.data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
);
2797 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
2804 if (e
->watchdog_fd
>= 0) {
2805 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
2806 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2814 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2818 _public_
int sd_event_get_watchdog(sd_event
*e
) {
2819 assert_return(e
, -EINVAL
);
2820 assert_return(!event_pid_changed(e
), -ECHILD
);