1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
27 #include "sd-daemon.h"
32 #include "time-util.h"
36 #include "signal-util.h"
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42 typedef enum EventSourceType
{
46 SOURCE_TIME_MONOTONIC
,
47 SOURCE_TIME_REALTIME_ALARM
,
48 SOURCE_TIME_BOOTTIME_ALARM
,
55 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
56 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
59 /* All objects we use in epoll events start with this value, so that
60 * we know how to dispatch it */
61 typedef enum WakeupType
{
67 _WAKEUP_TYPE_INVALID
= -1,
70 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
72 struct sd_event_source
{
79 sd_event_handler_t prepare
;
83 EventSourceType type
:5;
90 unsigned pending_index
;
91 unsigned prepare_index
;
92 unsigned pending_iteration
;
93 unsigned prepare_iteration
;
95 LIST_FIELDS(sd_event_source
, sources
);
99 sd_event_io_handler_t callback
;
106 sd_event_time_handler_t callback
;
107 usec_t next
, accuracy
;
108 unsigned earliest_index
;
109 unsigned latest_index
;
112 sd_event_signal_handler_t callback
;
113 struct signalfd_siginfo siginfo
;
117 sd_event_child_handler_t callback
;
123 sd_event_handler_t callback
;
126 sd_event_handler_t callback
;
129 sd_event_handler_t callback
;
130 unsigned prioq_index
;
139 /* For all clocks we maintain two priority queues each, one
140 * ordered for the earliest times the events may be
141 * dispatched, and one ordered by the latest times they must
142 * have been dispatched. The range between the top entries in
143 * the two prioqs is the time window we can freely schedule
156 /* For each priority we maintain one signal fd, so that we
157 * only have to dequeue a single event per priority at a
163 sd_event_source
*current
;
175 /* timerfd_create() only supports these five clocks so far. We
176 * can add support for more clocks when the kernel learns to
177 * deal with them, too. */
178 struct clock_data realtime
;
179 struct clock_data boottime
;
180 struct clock_data monotonic
;
181 struct clock_data realtime_alarm
;
182 struct clock_data boottime_alarm
;
186 sd_event_source
**signal_sources
; /* indexed by signal number */
187 Hashmap
*signal_data
; /* indexed by priority */
189 Hashmap
*child_sources
;
190 unsigned n_enabled_child_sources
;
199 dual_timestamp timestamp
;
200 usec_t timestamp_boottime
;
203 bool exit_requested
:1;
204 bool need_process_child
:1;
210 sd_event
**default_event_ptr
;
212 usec_t watchdog_last
, watchdog_period
;
216 LIST_HEAD(sd_event_source
, sources
);
219 static void source_disconnect(sd_event_source
*s
);
221 static int pending_prioq_compare(const void *a
, const void *b
) {
222 const sd_event_source
*x
= a
, *y
= b
;
227 /* Enabled ones first */
228 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
230 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
233 /* Lower priority values first */
234 if (x
->priority
< y
->priority
)
236 if (x
->priority
> y
->priority
)
239 /* Older entries first */
240 if (x
->pending_iteration
< y
->pending_iteration
)
242 if (x
->pending_iteration
> y
->pending_iteration
)
245 /* Stability for the rest */
254 static int prepare_prioq_compare(const void *a
, const void *b
) {
255 const sd_event_source
*x
= a
, *y
= b
;
260 /* Move most recently prepared ones last, so that we can stop
261 * preparing as soon as we hit one that has already been
262 * prepared in the current iteration */
263 if (x
->prepare_iteration
< y
->prepare_iteration
)
265 if (x
->prepare_iteration
> y
->prepare_iteration
)
268 /* Enabled ones first */
269 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
271 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
274 /* Lower priority values first */
275 if (x
->priority
< y
->priority
)
277 if (x
->priority
> y
->priority
)
280 /* Stability for the rest */
289 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
290 const sd_event_source
*x
= a
, *y
= b
;
292 assert(EVENT_SOURCE_IS_TIME(x
->type
));
293 assert(x
->type
== y
->type
);
295 /* Enabled ones first */
296 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
298 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
301 /* Move the pending ones to the end */
302 if (!x
->pending
&& y
->pending
)
304 if (x
->pending
&& !y
->pending
)
308 if (x
->time
.next
< y
->time
.next
)
310 if (x
->time
.next
> y
->time
.next
)
313 /* Stability for the rest */
322 static int latest_time_prioq_compare(const void *a
, const void *b
) {
323 const sd_event_source
*x
= a
, *y
= b
;
325 assert(EVENT_SOURCE_IS_TIME(x
->type
));
326 assert(x
->type
== y
->type
);
328 /* Enabled ones first */
329 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
331 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
334 /* Move the pending ones to the end */
335 if (!x
->pending
&& y
->pending
)
337 if (x
->pending
&& !y
->pending
)
341 if (x
->time
.next
+ x
->time
.accuracy
< y
->time
.next
+ y
->time
.accuracy
)
343 if (x
->time
.next
+ x
->time
.accuracy
> y
->time
.next
+ y
->time
.accuracy
)
346 /* Stability for the rest */
355 static int exit_prioq_compare(const void *a
, const void *b
) {
356 const sd_event_source
*x
= a
, *y
= b
;
358 assert(x
->type
== SOURCE_EXIT
);
359 assert(y
->type
== SOURCE_EXIT
);
361 /* Enabled ones first */
362 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
364 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
367 /* Lower priority values first */
368 if (x
->priority
< y
->priority
)
370 if (x
->priority
> y
->priority
)
373 /* Stability for the rest */
382 static void free_clock_data(struct clock_data
*d
) {
384 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
387 prioq_free(d
->earliest
);
388 prioq_free(d
->latest
);
391 static void event_free(sd_event
*e
) {
396 while ((s
= e
->sources
)) {
398 source_disconnect(s
);
399 sd_event_source_unref(s
);
402 assert(e
->n_sources
== 0);
404 if (e
->default_event_ptr
)
405 *(e
->default_event_ptr
) = NULL
;
407 safe_close(e
->epoll_fd
);
408 safe_close(e
->watchdog_fd
);
410 free_clock_data(&e
->realtime
);
411 free_clock_data(&e
->boottime
);
412 free_clock_data(&e
->monotonic
);
413 free_clock_data(&e
->realtime_alarm
);
414 free_clock_data(&e
->boottime_alarm
);
416 prioq_free(e
->pending
);
417 prioq_free(e
->prepare
);
420 free(e
->signal_sources
);
421 hashmap_free(e
->signal_data
);
423 hashmap_free(e
->child_sources
);
424 set_free(e
->post_sources
);
428 _public_
int sd_event_new(sd_event
** ret
) {
432 assert_return(ret
, -EINVAL
);
434 e
= new0(sd_event
, 1);
439 e
->watchdog_fd
= e
->epoll_fd
= e
->realtime
.fd
= e
->boottime
.fd
= e
->monotonic
.fd
= e
->realtime_alarm
.fd
= e
->boottime_alarm
.fd
= -1;
440 e
->realtime
.next
= e
->boottime
.next
= e
->monotonic
.next
= e
->realtime_alarm
.next
= e
->boottime_alarm
.next
= USEC_INFINITY
;
441 e
->realtime
.wakeup
= e
->boottime
.wakeup
= e
->monotonic
.wakeup
= e
->realtime_alarm
.wakeup
= e
->boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
;
442 e
->original_pid
= getpid();
443 e
->perturb
= USEC_INFINITY
;
445 e
->pending
= prioq_new(pending_prioq_compare
);
451 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
452 if (e
->epoll_fd
< 0) {
465 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
466 assert_return(e
, NULL
);
468 assert(e
->n_ref
>= 1);
474 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
479 assert(e
->n_ref
>= 1);
488 static bool event_pid_changed(sd_event
*e
) {
491 /* We don't support people creating an event loop and keeping
492 * it around over a fork(). Let's complain. */
494 return e
->original_pid
!= getpid();
497 static void source_io_unregister(sd_event_source
*s
) {
501 assert(s
->type
== SOURCE_IO
);
503 if (event_pid_changed(s
->event
))
506 if (!s
->io
.registered
)
509 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
511 log_debug_errno(errno
, "Failed to remove source %s from epoll: %m", strna(s
->description
));
513 s
->io
.registered
= false;
516 static int source_io_register(
521 struct epoll_event ev
= {};
525 assert(s
->type
== SOURCE_IO
);
526 assert(enabled
!= SD_EVENT_OFF
);
531 if (enabled
== SD_EVENT_ONESHOT
)
532 ev
.events
|= EPOLLONESHOT
;
534 if (s
->io
.registered
)
535 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
537 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
541 s
->io
.registered
= true;
546 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
550 case SOURCE_TIME_REALTIME
:
551 return CLOCK_REALTIME
;
553 case SOURCE_TIME_BOOTTIME
:
554 return CLOCK_BOOTTIME
;
556 case SOURCE_TIME_MONOTONIC
:
557 return CLOCK_MONOTONIC
;
559 case SOURCE_TIME_REALTIME_ALARM
:
560 return CLOCK_REALTIME_ALARM
;
562 case SOURCE_TIME_BOOTTIME_ALARM
:
563 return CLOCK_BOOTTIME_ALARM
;
566 return (clockid_t
) -1;
570 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
575 return SOURCE_TIME_REALTIME
;
578 return SOURCE_TIME_BOOTTIME
;
580 case CLOCK_MONOTONIC
:
581 return SOURCE_TIME_MONOTONIC
;
583 case CLOCK_REALTIME_ALARM
:
584 return SOURCE_TIME_REALTIME_ALARM
;
586 case CLOCK_BOOTTIME_ALARM
:
587 return SOURCE_TIME_BOOTTIME_ALARM
;
590 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
594 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
599 case SOURCE_TIME_REALTIME
:
602 case SOURCE_TIME_BOOTTIME
:
605 case SOURCE_TIME_MONOTONIC
:
606 return &e
->monotonic
;
608 case SOURCE_TIME_REALTIME_ALARM
:
609 return &e
->realtime_alarm
;
611 case SOURCE_TIME_BOOTTIME_ALARM
:
612 return &e
->boottime_alarm
;
619 static int event_make_signal_data(
622 struct signal_data
**ret
) {
624 struct epoll_event ev
= {};
625 struct signal_data
*d
;
633 if (event_pid_changed(e
))
636 if (e
->signal_sources
&& e
->signal_sources
[sig
])
637 priority
= e
->signal_sources
[sig
]->priority
;
641 d
= hashmap_get(e
->signal_data
, &priority
);
643 if (sigismember(&d
->sigset
, sig
) > 0) {
649 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
653 d
= new0(struct signal_data
, 1);
657 d
->wakeup
= WAKEUP_SIGNAL_DATA
;
659 d
->priority
= priority
;
661 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
669 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
671 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
690 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
703 d
->fd
= safe_close(d
->fd
);
704 hashmap_remove(e
->signal_data
, &d
->priority
);
711 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
715 /* Turns off the specified signal in the signal data
716 * object. If the signal mask of the object becomes empty that
719 if (sigismember(&d
->sigset
, sig
) == 0)
722 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
724 if (sigisemptyset(&d
->sigset
)) {
726 /* If all the mask is all-zero we can get rid of the structure */
727 hashmap_remove(e
->signal_data
, &d
->priority
);
736 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
737 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
740 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
741 struct signal_data
*d
;
742 static const int64_t zero_priority
= 0;
746 /* Rechecks if the specified signal is still something we are
747 * interested in. If not, we'll unmask it, and possibly drop
748 * the signalfd for it. */
750 if (sig
== SIGCHLD
&&
751 e
->n_enabled_child_sources
> 0)
754 if (e
->signal_sources
&&
755 e
->signal_sources
[sig
] &&
756 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
760 * The specified signal might be enabled in three different queues:
762 * 1) the one that belongs to the priority passed (if it is non-NULL)
763 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
764 * 3) the 0 priority (to cover the SIGCHLD case)
766 * Hence, let's remove it from all three here.
770 d
= hashmap_get(e
->signal_data
, priority
);
772 event_unmask_signal_data(e
, d
, sig
);
775 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
776 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
778 event_unmask_signal_data(e
, d
, sig
);
781 d
= hashmap_get(e
->signal_data
, &zero_priority
);
783 event_unmask_signal_data(e
, d
, sig
);
786 static void source_disconnect(sd_event_source
*s
) {
794 assert(s
->event
->n_sources
> 0);
800 source_io_unregister(s
);
804 case SOURCE_TIME_REALTIME
:
805 case SOURCE_TIME_BOOTTIME
:
806 case SOURCE_TIME_MONOTONIC
:
807 case SOURCE_TIME_REALTIME_ALARM
:
808 case SOURCE_TIME_BOOTTIME_ALARM
: {
809 struct clock_data
*d
;
811 d
= event_get_clock_data(s
->event
, s
->type
);
814 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
815 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
816 d
->needs_rearm
= true;
821 if (s
->signal
.sig
> 0) {
823 if (s
->event
->signal_sources
)
824 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
826 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
832 if (s
->child
.pid
> 0) {
833 if (s
->enabled
!= SD_EVENT_OFF
) {
834 assert(s
->event
->n_enabled_child_sources
> 0);
835 s
->event
->n_enabled_child_sources
--;
838 (void) hashmap_remove(s
->event
->child_sources
, INT_TO_PTR(s
->child
.pid
));
839 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
849 set_remove(s
->event
->post_sources
, s
);
853 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
857 assert_not_reached("Wut? I shouldn't exist.");
861 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
864 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
868 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
870 LIST_REMOVE(sources
, event
->sources
, s
);
874 sd_event_unref(event
);
877 static void source_free(sd_event_source
*s
) {
880 source_disconnect(s
);
881 free(s
->description
);
885 static int source_set_pending(sd_event_source
*s
, bool b
) {
889 assert(s
->type
!= SOURCE_EXIT
);
897 s
->pending_iteration
= s
->event
->iteration
;
899 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
905 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
907 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
908 struct clock_data
*d
;
910 d
= event_get_clock_data(s
->event
, s
->type
);
913 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
914 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
915 d
->needs_rearm
= true;
918 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
919 struct signal_data
*d
;
921 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
922 if (d
&& d
->current
== s
)
929 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
934 s
= new0(sd_event_source
, 1);
940 s
->floating
= floating
;
942 s
->pending_index
= s
->prepare_index
= PRIOQ_IDX_NULL
;
947 LIST_PREPEND(sources
, e
->sources
, s
);
953 _public_
int sd_event_add_io(
955 sd_event_source
**ret
,
958 sd_event_io_handler_t callback
,
964 assert_return(e
, -EINVAL
);
965 assert_return(fd
>= 0, -EBADF
);
966 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
967 assert_return(callback
, -EINVAL
);
968 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
969 assert_return(!event_pid_changed(e
), -ECHILD
);
971 s
= source_new(e
, !ret
, SOURCE_IO
);
975 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
977 s
->io
.events
= events
;
978 s
->io
.callback
= callback
;
979 s
->userdata
= userdata
;
980 s
->enabled
= SD_EVENT_ON
;
982 r
= source_io_register(s
, s
->enabled
, events
);
994 static void initialize_perturb(sd_event
*e
) {
995 sd_id128_t bootid
= {};
997 /* When we sleep for longer, we try to realign the wakeup to
998 the same time wihtin each minute/second/250ms, so that
999 events all across the system can be coalesced into a single
1000 CPU wakeup. However, let's take some system-specific
1001 randomness for this value, so that in a network of systems
1002 with synced clocks timer events are distributed a
1003 bit. Here, we calculate a perturbation usec offset from the
1006 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1009 if (sd_id128_get_boot(&bootid
) >= 0)
1010 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1013 static int event_setup_timer_fd(
1015 struct clock_data
*d
,
1018 struct epoll_event ev
= {};
1024 if (_likely_(d
->fd
>= 0))
1027 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1031 ev
.events
= EPOLLIN
;
1034 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1044 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1047 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1050 _public_
int sd_event_add_time(
1052 sd_event_source
**ret
,
1056 sd_event_time_handler_t callback
,
1059 EventSourceType type
;
1061 struct clock_data
*d
;
1064 assert_return(e
, -EINVAL
);
1065 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1066 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1067 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1068 assert_return(!event_pid_changed(e
), -ECHILD
);
1071 callback
= time_exit_callback
;
1073 type
= clock_to_event_source_type(clock
);
1074 assert_return(type
>= 0, -EOPNOTSUPP
);
1076 d
= event_get_clock_data(e
, type
);
1080 d
->earliest
= prioq_new(earliest_time_prioq_compare
);
1086 d
->latest
= prioq_new(latest_time_prioq_compare
);
1092 r
= event_setup_timer_fd(e
, d
, clock
);
1097 s
= source_new(e
, !ret
, type
);
1101 s
->time
.next
= usec
;
1102 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1103 s
->time
.callback
= callback
;
1104 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1105 s
->userdata
= userdata
;
1106 s
->enabled
= SD_EVENT_ONESHOT
;
1108 d
->needs_rearm
= true;
1110 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1114 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1128 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1131 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1134 _public_
int sd_event_add_signal(
1136 sd_event_source
**ret
,
1138 sd_event_signal_handler_t callback
,
1142 struct signal_data
*d
;
1146 assert_return(e
, -EINVAL
);
1147 assert_return(sig
> 0, -EINVAL
);
1148 assert_return(sig
< _NSIG
, -EINVAL
);
1149 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1150 assert_return(!event_pid_changed(e
), -ECHILD
);
1153 callback
= signal_exit_callback
;
1155 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1159 if (!sigismember(&ss
, sig
))
1162 if (!e
->signal_sources
) {
1163 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1164 if (!e
->signal_sources
)
1166 } else if (e
->signal_sources
[sig
])
1169 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1173 s
->signal
.sig
= sig
;
1174 s
->signal
.callback
= callback
;
1175 s
->userdata
= userdata
;
1176 s
->enabled
= SD_EVENT_ON
;
1178 e
->signal_sources
[sig
] = s
;
1180 r
= event_make_signal_data(e
, sig
, &d
);
1186 /* Use the signal name as description for the event source by default */
1187 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1195 _public_
int sd_event_add_child(
1197 sd_event_source
**ret
,
1200 sd_event_child_handler_t callback
,
1206 assert_return(e
, -EINVAL
);
1207 assert_return(pid
> 1, -EINVAL
);
1208 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1209 assert_return(options
!= 0, -EINVAL
);
1210 assert_return(callback
, -EINVAL
);
1211 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1212 assert_return(!event_pid_changed(e
), -ECHILD
);
1214 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1218 if (hashmap_contains(e
->child_sources
, INT_TO_PTR(pid
)))
1221 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1226 s
->child
.options
= options
;
1227 s
->child
.callback
= callback
;
1228 s
->userdata
= userdata
;
1229 s
->enabled
= SD_EVENT_ONESHOT
;
1231 r
= hashmap_put(e
->child_sources
, INT_TO_PTR(pid
), s
);
1237 e
->n_enabled_child_sources
++;
1239 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1241 e
->n_enabled_child_sources
--;
1246 e
->need_process_child
= true;
1254 _public_
int sd_event_add_defer(
1256 sd_event_source
**ret
,
1257 sd_event_handler_t callback
,
1263 assert_return(e
, -EINVAL
);
1264 assert_return(callback
, -EINVAL
);
1265 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1266 assert_return(!event_pid_changed(e
), -ECHILD
);
1268 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1272 s
->defer
.callback
= callback
;
1273 s
->userdata
= userdata
;
1274 s
->enabled
= SD_EVENT_ONESHOT
;
1276 r
= source_set_pending(s
, true);
1288 _public_
int sd_event_add_post(
1290 sd_event_source
**ret
,
1291 sd_event_handler_t callback
,
1297 assert_return(e
, -EINVAL
);
1298 assert_return(callback
, -EINVAL
);
1299 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1300 assert_return(!event_pid_changed(e
), -ECHILD
);
1302 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1306 s
= source_new(e
, !ret
, SOURCE_POST
);
1310 s
->post
.callback
= callback
;
1311 s
->userdata
= userdata
;
1312 s
->enabled
= SD_EVENT_ON
;
1314 r
= set_put(e
->post_sources
, s
);
1326 _public_
int sd_event_add_exit(
1328 sd_event_source
**ret
,
1329 sd_event_handler_t callback
,
1335 assert_return(e
, -EINVAL
);
1336 assert_return(callback
, -EINVAL
);
1337 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1338 assert_return(!event_pid_changed(e
), -ECHILD
);
1341 e
->exit
= prioq_new(exit_prioq_compare
);
1346 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1350 s
->exit
.callback
= callback
;
1351 s
->userdata
= userdata
;
1352 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1353 s
->enabled
= SD_EVENT_ONESHOT
;
1355 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1367 _public_ sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
1368 assert_return(s
, NULL
);
1370 assert(s
->n_ref
>= 1);
1376 _public_ sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
1381 assert(s
->n_ref
>= 1);
1384 if (s
->n_ref
<= 0) {
1385 /* Here's a special hack: when we are called from a
1386 * dispatch handler we won't free the event source
1387 * immediately, but we will detach the fd from the
1388 * epoll. This way it is safe for the caller to unref
1389 * the event source and immediately close the fd, but
1390 * we still retain a valid event source object after
1393 if (s
->dispatching
) {
1394 if (s
->type
== SOURCE_IO
)
1395 source_io_unregister(s
);
1397 source_disconnect(s
);
1405 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1406 assert_return(s
, -EINVAL
);
1407 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1409 return free_and_strdup(&s
->description
, description
);
1412 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1413 assert_return(s
, -EINVAL
);
1414 assert_return(description
, -EINVAL
);
1415 assert_return(s
->description
, -ENXIO
);
1416 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1418 *description
= s
->description
;
1422 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1423 assert_return(s
, NULL
);
1428 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1429 assert_return(s
, -EINVAL
);
1430 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1431 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1432 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1437 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1438 assert_return(s
, -EINVAL
);
1439 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1440 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1445 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1448 assert_return(s
, -EINVAL
);
1449 assert_return(fd
>= 0, -EBADF
);
1450 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1451 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1456 if (s
->enabled
== SD_EVENT_OFF
) {
1458 s
->io
.registered
= false;
1462 saved_fd
= s
->io
.fd
;
1463 assert(s
->io
.registered
);
1466 s
->io
.registered
= false;
1468 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1470 s
->io
.fd
= saved_fd
;
1471 s
->io
.registered
= true;
1475 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1481 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1482 assert_return(s
, -EINVAL
);
1483 assert_return(events
, -EINVAL
);
1484 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1485 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1487 *events
= s
->io
.events
;
1491 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1494 assert_return(s
, -EINVAL
);
1495 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1496 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1497 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1498 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1500 /* edge-triggered updates are never skipped, so we can reset edges */
1501 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1504 if (s
->enabled
!= SD_EVENT_OFF
) {
1505 r
= source_io_register(s
, s
->enabled
, events
);
1510 s
->io
.events
= events
;
1511 source_set_pending(s
, false);
1516 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1517 assert_return(s
, -EINVAL
);
1518 assert_return(revents
, -EINVAL
);
1519 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1520 assert_return(s
->pending
, -ENODATA
);
1521 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1523 *revents
= s
->io
.revents
;
1527 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1528 assert_return(s
, -EINVAL
);
1529 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1530 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1532 return s
->signal
.sig
;
1535 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1536 assert_return(s
, -EINVAL
);
1537 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1542 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1545 assert_return(s
, -EINVAL
);
1546 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1547 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1549 if (s
->priority
== priority
)
1552 if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1553 struct signal_data
*old
, *d
;
1555 /* Move us from the signalfd belonging to the old
1556 * priority to the signalfd of the new priority */
1558 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1560 s
->priority
= priority
;
1562 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1564 s
->priority
= old
->priority
;
1568 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1570 s
->priority
= priority
;
1573 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1576 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1578 if (s
->type
== SOURCE_EXIT
)
1579 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1584 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1585 assert_return(s
, -EINVAL
);
1586 assert_return(m
, -EINVAL
);
1587 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1593 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1596 assert_return(s
, -EINVAL
);
1597 assert_return(m
== SD_EVENT_OFF
|| m
== SD_EVENT_ON
|| m
== SD_EVENT_ONESHOT
, -EINVAL
);
1598 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1600 /* If we are dead anyway, we are fine with turning off
1601 * sources, but everything else needs to fail. */
1602 if (s
->event
->state
== SD_EVENT_FINISHED
)
1603 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1605 if (s
->enabled
== m
)
1608 if (m
== SD_EVENT_OFF
) {
1613 source_io_unregister(s
);
1617 case SOURCE_TIME_REALTIME
:
1618 case SOURCE_TIME_BOOTTIME
:
1619 case SOURCE_TIME_MONOTONIC
:
1620 case SOURCE_TIME_REALTIME_ALARM
:
1621 case SOURCE_TIME_BOOTTIME_ALARM
: {
1622 struct clock_data
*d
;
1625 d
= event_get_clock_data(s
->event
, s
->type
);
1628 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1629 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1630 d
->needs_rearm
= true;
1637 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1643 assert(s
->event
->n_enabled_child_sources
> 0);
1644 s
->event
->n_enabled_child_sources
--;
1646 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1651 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1660 assert_not_reached("Wut? I shouldn't exist.");
1667 r
= source_io_register(s
, m
, s
->io
.events
);
1674 case SOURCE_TIME_REALTIME
:
1675 case SOURCE_TIME_BOOTTIME
:
1676 case SOURCE_TIME_MONOTONIC
:
1677 case SOURCE_TIME_REALTIME_ALARM
:
1678 case SOURCE_TIME_BOOTTIME_ALARM
: {
1679 struct clock_data
*d
;
1682 d
= event_get_clock_data(s
->event
, s
->type
);
1685 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1686 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1687 d
->needs_rearm
= true;
1695 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
1697 s
->enabled
= SD_EVENT_OFF
;
1698 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1706 if (s
->enabled
== SD_EVENT_OFF
)
1707 s
->event
->n_enabled_child_sources
++;
1711 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
1713 s
->enabled
= SD_EVENT_OFF
;
1714 s
->event
->n_enabled_child_sources
--;
1715 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1723 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1732 assert_not_reached("Wut? I shouldn't exist.");
1737 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1740 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1745 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
1746 assert_return(s
, -EINVAL
);
1747 assert_return(usec
, -EINVAL
);
1748 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1749 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1751 *usec
= s
->time
.next
;
1755 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
1756 struct clock_data
*d
;
1758 assert_return(s
, -EINVAL
);
1759 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1760 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1761 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1762 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1764 s
->time
.next
= usec
;
1766 source_set_pending(s
, false);
1768 d
= event_get_clock_data(s
->event
, s
->type
);
1771 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1772 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1773 d
->needs_rearm
= true;
1778 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
1779 assert_return(s
, -EINVAL
);
1780 assert_return(usec
, -EINVAL
);
1781 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1782 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1784 *usec
= s
->time
.accuracy
;
1788 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
1789 struct clock_data
*d
;
1791 assert_return(s
, -EINVAL
);
1792 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1793 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1794 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1795 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1798 usec
= DEFAULT_ACCURACY_USEC
;
1800 s
->time
.accuracy
= usec
;
1802 source_set_pending(s
, false);
1804 d
= event_get_clock_data(s
->event
, s
->type
);
1807 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1808 d
->needs_rearm
= true;
1813 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
1814 assert_return(s
, -EINVAL
);
1815 assert_return(clock
, -EINVAL
);
1816 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1817 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1819 *clock
= event_source_type_to_clock(s
->type
);
1823 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
1824 assert_return(s
, -EINVAL
);
1825 assert_return(pid
, -EINVAL
);
1826 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
1827 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1829 *pid
= s
->child
.pid
;
1833 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
1836 assert_return(s
, -EINVAL
);
1837 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1838 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1839 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1841 if (s
->prepare
== callback
)
1844 if (callback
&& s
->prepare
) {
1845 s
->prepare
= callback
;
1849 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
1853 s
->prepare
= callback
;
1856 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
1860 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1865 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
1866 assert_return(s
, NULL
);
1871 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
1874 assert_return(s
, NULL
);
1877 s
->userdata
= userdata
;
1882 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
1893 initialize_perturb(e
);
1896 Find a good time to wake up again between times a and b. We
1897 have two goals here:
1899 a) We want to wake up as seldom as possible, hence prefer
1900 later times over earlier times.
1902 b) But if we have to wake up, then let's make sure to
1903 dispatch as much as possible on the entire system.
1905 We implement this by waking up everywhere at the same time
1906 within any given minute if we can, synchronised via the
1907 perturbation value determined from the boot ID. If we can't,
1908 then we try to find the same spot in every 10s, then 1s and
1909 then 250ms step. Otherwise, we pick the last possible time
1913 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
1915 if (_unlikely_(c
< USEC_PER_MINUTE
))
1918 c
-= USEC_PER_MINUTE
;
1924 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
1926 if (_unlikely_(c
< USEC_PER_SEC
*10))
1929 c
-= USEC_PER_SEC
*10;
1935 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
1937 if (_unlikely_(c
< USEC_PER_SEC
))
1946 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
1948 if (_unlikely_(c
< USEC_PER_MSEC
*250))
1951 c
-= USEC_PER_MSEC
*250;
1960 static int event_arm_timer(
1962 struct clock_data
*d
) {
1964 struct itimerspec its
= {};
1965 sd_event_source
*a
, *b
;
1972 if (!d
->needs_rearm
)
1975 d
->needs_rearm
= false;
1977 a
= prioq_peek(d
->earliest
);
1978 if (!a
|| a
->enabled
== SD_EVENT_OFF
) {
1983 if (d
->next
== USEC_INFINITY
)
1987 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1991 d
->next
= USEC_INFINITY
;
1995 b
= prioq_peek(d
->latest
);
1996 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
1998 t
= sleep_between(e
, a
->time
.next
, b
->time
.next
+ b
->time
.accuracy
);
2002 assert_se(d
->fd
>= 0);
2005 /* We don' want to disarm here, just mean some time looooong ago. */
2006 its
.it_value
.tv_sec
= 0;
2007 its
.it_value
.tv_nsec
= 1;
2009 timespec_store(&its
.it_value
, t
);
2011 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2019 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2022 assert(s
->type
== SOURCE_IO
);
2024 /* If the event source was already pending, we just OR in the
2025 * new revents, otherwise we reset the value. The ORing is
2026 * necessary to handle EPOLLONESHOT events properly where
2027 * readability might happen independently of writability, and
2028 * we need to keep track of both */
2031 s
->io
.revents
|= revents
;
2033 s
->io
.revents
= revents
;
2035 return source_set_pending(s
, true);
2038 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2045 assert_return(events
== EPOLLIN
, -EIO
);
2047 ss
= read(fd
, &x
, sizeof(x
));
2049 if (errno
== EAGAIN
|| errno
== EINTR
)
2055 if (_unlikely_(ss
!= sizeof(x
)))
2059 *next
= USEC_INFINITY
;
2064 static int process_timer(
2067 struct clock_data
*d
) {
2076 s
= prioq_peek(d
->earliest
);
2079 s
->enabled
== SD_EVENT_OFF
||
2083 r
= source_set_pending(s
, true);
2087 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2088 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2089 d
->needs_rearm
= true;
2095 static int process_child(sd_event
*e
) {
2102 e
->need_process_child
= false;
2105 So, this is ugly. We iteratively invoke waitid() with P_PID
2106 + WNOHANG for each PID we wait for, instead of using
2107 P_ALL. This is because we only want to get child
2108 information of very specific child processes, and not all
2109 of them. We might not have processed the SIGCHLD even of a
2110 previous invocation and we don't want to maintain a
2111 unbounded *per-child* event queue, hence we really don't
2112 want anything flushed out of the kernel's queue that we
2113 don't care about. Since this is O(n) this means that if you
2114 have a lot of processes you probably want to handle SIGCHLD
2117 We do not reap the children here (by using WNOWAIT), this
2118 is only done after the event source is dispatched so that
2119 the callback still sees the process as a zombie.
2122 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2123 assert(s
->type
== SOURCE_CHILD
);
2128 if (s
->enabled
== SD_EVENT_OFF
)
2131 zero(s
->child
.siginfo
);
2132 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2133 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2137 if (s
->child
.siginfo
.si_pid
!= 0) {
2139 s
->child
.siginfo
.si_code
== CLD_EXITED
||
2140 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2141 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2143 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2144 /* If the child isn't dead then let's
2145 * immediately remove the state change
2146 * from the queue, since there's no
2147 * benefit in leaving it queued */
2149 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2150 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2153 r
= source_set_pending(s
, true);
2162 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2163 bool read_one
= false;
2167 assert_return(events
== EPOLLIN
, -EIO
);
2169 /* If there's a signal queued on this priority and SIGCHLD is
2170 on this priority too, then make sure to recheck the
2171 children we watch. This is because we only ever dequeue
2172 the first signal per priority, and if we dequeue one, and
2173 SIGCHLD might be enqueued later we wouldn't know, but we
2174 might have higher priority children we care about hence we
2175 need to check that explicitly. */
2177 if (sigismember(&d
->sigset
, SIGCHLD
))
2178 e
->need_process_child
= true;
2180 /* If there's already an event source pending for this
2181 * priority we don't read another */
2186 struct signalfd_siginfo si
;
2188 sd_event_source
*s
= NULL
;
2190 n
= read(d
->fd
, &si
, sizeof(si
));
2192 if (errno
== EAGAIN
|| errno
== EINTR
)
2198 if (_unlikely_(n
!= sizeof(si
)))
2201 assert(si
.ssi_signo
< _NSIG
);
2205 if (e
->signal_sources
)
2206 s
= e
->signal_sources
[si
.ssi_signo
];
2212 s
->signal
.siginfo
= si
;
2215 r
= source_set_pending(s
, true);
2223 static int source_dispatch(sd_event_source
*s
) {
2227 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2229 if (s
->type
!= SOURCE_DEFER
&& s
->type
!= SOURCE_EXIT
) {
2230 r
= source_set_pending(s
, false);
2235 if (s
->type
!= SOURCE_POST
) {
2239 /* If we execute a non-post source, let's mark all
2240 * post sources as pending */
2242 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2243 if (z
->enabled
== SD_EVENT_OFF
)
2246 r
= source_set_pending(z
, true);
2252 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2253 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2258 s
->dispatching
= true;
2263 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2266 case SOURCE_TIME_REALTIME
:
2267 case SOURCE_TIME_BOOTTIME
:
2268 case SOURCE_TIME_MONOTONIC
:
2269 case SOURCE_TIME_REALTIME_ALARM
:
2270 case SOURCE_TIME_BOOTTIME_ALARM
:
2271 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2275 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2278 case SOURCE_CHILD
: {
2281 zombie
= s
->child
.siginfo
.si_code
== CLD_EXITED
||
2282 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2283 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2285 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2287 /* Now, reap the PID for good. */
2289 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2295 r
= s
->defer
.callback(s
, s
->userdata
);
2299 r
= s
->post
.callback(s
, s
->userdata
);
2303 r
= s
->exit
.callback(s
, s
->userdata
);
2306 case SOURCE_WATCHDOG
:
2307 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2308 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2309 assert_not_reached("Wut? I shouldn't exist.");
2312 s
->dispatching
= false;
2316 log_debug_errno(r
, "Event source '%s' returned error, disabling: %m", s
->description
);
2318 log_debug_errno(r
, "Event source %p returned error, disabling: %m", s
);
2324 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2329 static int event_prepare(sd_event
*e
) {
2337 s
= prioq_peek(e
->prepare
);
2338 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2341 s
->prepare_iteration
= e
->iteration
;
2342 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2348 s
->dispatching
= true;
2349 r
= s
->prepare(s
, s
->userdata
);
2350 s
->dispatching
= false;
2354 log_debug_errno(r
, "Prepare callback of event source '%s' returned error, disabling: %m", s
->description
);
2356 log_debug_errno(r
, "Prepare callback of event source %p returned error, disabling: %m", s
);
2362 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2368 static int dispatch_exit(sd_event
*e
) {
2374 p
= prioq_peek(e
->exit
);
2375 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2376 e
->state
= SD_EVENT_FINISHED
;
2382 e
->state
= SD_EVENT_EXITING
;
2384 r
= source_dispatch(p
);
2386 e
->state
= SD_EVENT_INITIAL
;
2392 static sd_event_source
* event_next_pending(sd_event
*e
) {
2397 p
= prioq_peek(e
->pending
);
2401 if (p
->enabled
== SD_EVENT_OFF
)
2407 static int arm_watchdog(sd_event
*e
) {
2408 struct itimerspec its
= {};
2413 assert(e
->watchdog_fd
>= 0);
2415 t
= sleep_between(e
,
2416 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2417 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2419 timespec_store(&its
.it_value
, t
);
2421 /* Make sure we never set the watchdog to 0, which tells the
2422 * kernel to disable it. */
2423 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
2424 its
.it_value
.tv_nsec
= 1;
2426 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2433 static int process_watchdog(sd_event
*e
) {
2439 /* Don't notify watchdog too often */
2440 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
2443 sd_notify(false, "WATCHDOG=1");
2444 e
->watchdog_last
= e
->timestamp
.monotonic
;
2446 return arm_watchdog(e
);
2449 _public_
int sd_event_prepare(sd_event
*e
) {
2452 assert_return(e
, -EINVAL
);
2453 assert_return(!event_pid_changed(e
), -ECHILD
);
2454 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2455 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2457 if (e
->exit_requested
)
2462 r
= event_prepare(e
);
2466 r
= event_arm_timer(e
, &e
->realtime
);
2470 r
= event_arm_timer(e
, &e
->boottime
);
2474 r
= event_arm_timer(e
, &e
->monotonic
);
2478 r
= event_arm_timer(e
, &e
->realtime_alarm
);
2482 r
= event_arm_timer(e
, &e
->boottime_alarm
);
2486 if (event_next_pending(e
) || e
->need_process_child
)
2489 e
->state
= SD_EVENT_ARMED
;
2494 e
->state
= SD_EVENT_ARMED
;
2495 r
= sd_event_wait(e
, 0);
2497 e
->state
= SD_EVENT_ARMED
;
2502 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
2503 struct epoll_event
*ev_queue
;
2504 unsigned ev_queue_max
;
2507 assert_return(e
, -EINVAL
);
2508 assert_return(!event_pid_changed(e
), -ECHILD
);
2509 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2510 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
2512 if (e
->exit_requested
) {
2513 e
->state
= SD_EVENT_PENDING
;
2517 ev_queue_max
= MAX(e
->n_sources
, 1u);
2518 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
2520 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
2521 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
2523 if (errno
== EINTR
) {
2524 e
->state
= SD_EVENT_PENDING
;
2532 dual_timestamp_get(&e
->timestamp
);
2533 e
->timestamp_boottime
= now(CLOCK_BOOTTIME
);
2535 for (i
= 0; i
< m
; i
++) {
2537 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
2538 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
2540 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
2544 case WAKEUP_EVENT_SOURCE
:
2545 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2548 case WAKEUP_CLOCK_DATA
: {
2549 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
2550 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
2554 case WAKEUP_SIGNAL_DATA
:
2555 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2559 assert_not_reached("Invalid wake-up pointer");
2566 r
= process_watchdog(e
);
2570 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
2574 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime
);
2578 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
2582 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
2586 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime_alarm
);
2590 if (e
->need_process_child
) {
2591 r
= process_child(e
);
2596 if (event_next_pending(e
)) {
2597 e
->state
= SD_EVENT_PENDING
;
2605 e
->state
= SD_EVENT_INITIAL
;
2610 _public_
int sd_event_dispatch(sd_event
*e
) {
2614 assert_return(e
, -EINVAL
);
2615 assert_return(!event_pid_changed(e
), -ECHILD
);
2616 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2617 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
2619 if (e
->exit_requested
)
2620 return dispatch_exit(e
);
2622 p
= event_next_pending(e
);
2626 e
->state
= SD_EVENT_RUNNING
;
2627 r
= source_dispatch(p
);
2628 e
->state
= SD_EVENT_INITIAL
;
2635 e
->state
= SD_EVENT_INITIAL
;
2640 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
2643 assert_return(e
, -EINVAL
);
2644 assert_return(!event_pid_changed(e
), -ECHILD
);
2645 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2646 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2648 r
= sd_event_prepare(e
);
2650 /* There was nothing? Then wait... */
2651 r
= sd_event_wait(e
, timeout
);
2654 /* There's something now, then let's dispatch it */
2655 r
= sd_event_dispatch(e
);
2665 _public_
int sd_event_loop(sd_event
*e
) {
2668 assert_return(e
, -EINVAL
);
2669 assert_return(!event_pid_changed(e
), -ECHILD
);
2670 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2674 while (e
->state
!= SD_EVENT_FINISHED
) {
2675 r
= sd_event_run(e
, (uint64_t) -1);
2687 _public_
int sd_event_get_fd(sd_event
*e
) {
2689 assert_return(e
, -EINVAL
);
2690 assert_return(!event_pid_changed(e
), -ECHILD
);
2695 _public_
int sd_event_get_state(sd_event
*e
) {
2696 assert_return(e
, -EINVAL
);
2697 assert_return(!event_pid_changed(e
), -ECHILD
);
2702 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
2703 assert_return(e
, -EINVAL
);
2704 assert_return(code
, -EINVAL
);
2705 assert_return(!event_pid_changed(e
), -ECHILD
);
2707 if (!e
->exit_requested
)
2710 *code
= e
->exit_code
;
2714 _public_
int sd_event_exit(sd_event
*e
, int code
) {
2715 assert_return(e
, -EINVAL
);
2716 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2717 assert_return(!event_pid_changed(e
), -ECHILD
);
2719 e
->exit_requested
= true;
2720 e
->exit_code
= code
;
2725 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
2726 assert_return(e
, -EINVAL
);
2727 assert_return(usec
, -EINVAL
);
2728 assert_return(!event_pid_changed(e
), -ECHILD
);
2730 if (!dual_timestamp_is_set(&e
->timestamp
)) {
2731 /* Implicitly fall back to now() if we never ran
2732 * before and thus have no cached time. */
2739 case CLOCK_REALTIME
:
2740 case CLOCK_REALTIME_ALARM
:
2741 *usec
= e
->timestamp
.realtime
;
2744 case CLOCK_MONOTONIC
:
2745 *usec
= e
->timestamp
.monotonic
;
2748 case CLOCK_BOOTTIME
:
2749 case CLOCK_BOOTTIME_ALARM
:
2750 *usec
= e
->timestamp_boottime
;
2757 _public_
int sd_event_default(sd_event
**ret
) {
2759 static thread_local sd_event
*default_event
= NULL
;
2764 return !!default_event
;
2766 if (default_event
) {
2767 *ret
= sd_event_ref(default_event
);
2771 r
= sd_event_new(&e
);
2775 e
->default_event_ptr
= &default_event
;
2783 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
2784 assert_return(e
, -EINVAL
);
2785 assert_return(tid
, -EINVAL
);
2786 assert_return(!event_pid_changed(e
), -ECHILD
);
2796 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
2799 assert_return(e
, -EINVAL
);
2800 assert_return(!event_pid_changed(e
), -ECHILD
);
2802 if (e
->watchdog
== !!b
)
2806 struct epoll_event ev
= {};
2808 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
2812 /* Issue first ping immediately */
2813 sd_notify(false, "WATCHDOG=1");
2814 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
2816 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
2817 if (e
->watchdog_fd
< 0)
2820 r
= arm_watchdog(e
);
2824 ev
.events
= EPOLLIN
;
2825 ev
.data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
);
2827 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
2834 if (e
->watchdog_fd
>= 0) {
2835 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
2836 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2844 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2848 _public_
int sd_event_get_watchdog(sd_event
*e
) {
2849 assert_return(e
, -EINVAL
);
2850 assert_return(!event_pid_changed(e
), -ECHILD
);