1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
30 #include "time-util.h"
35 #define EPOLL_QUEUE_MAX 64
36 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
38 typedef enum EventSourceType
{
48 struct sd_event_source
{
53 sd_prepare_handler_t prepare
;
55 EventSourceType type
:4;
60 unsigned pending_index
;
61 unsigned prepare_index
;
62 unsigned pending_iteration
;
63 unsigned prepare_iteration
;
67 sd_io_handler_t callback
;
74 sd_time_handler_t callback
;
75 usec_t next
, accuracy
;
76 unsigned earliest_index
;
77 unsigned latest_index
;
80 sd_signal_handler_t callback
;
81 struct signalfd_siginfo siginfo
;
85 sd_child_handler_t callback
;
91 sd_defer_handler_t callback
;
94 sd_quit_handler_t callback
;
111 /* For both clocks we maintain two priority queues each, one
112 * ordered for the earliest times the events may be
113 * dispatched, and one ordered by the latest times they must
114 * have been dispatched. The range between the top entries in
115 * the two prioqs is the time window we can freely schedule
117 Prioq
*monotonic_earliest
;
118 Prioq
*monotonic_latest
;
119 Prioq
*realtime_earliest
;
120 Prioq
*realtime_latest
;
122 usec_t realtime_next
, monotonic_next
;
126 sd_event_source
**signal_sources
;
128 Hashmap
*child_sources
;
129 unsigned n_enabled_child_sources
;
136 dual_timestamp timestamp
;
139 bool quit_requested
:1;
140 bool need_process_child
:1;
143 static int pending_prioq_compare(const void *a
, const void *b
) {
144 const sd_event_source
*x
= a
, *y
= b
;
149 /* Enabled ones first */
150 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
152 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
155 /* Lower priority values first */
156 if (x
->priority
< y
->priority
)
158 if (x
->priority
> y
->priority
)
161 /* Older entries first */
162 if (x
->pending_iteration
< y
->pending_iteration
)
164 if (x
->pending_iteration
> y
->pending_iteration
)
167 /* Stability for the rest */
176 static int prepare_prioq_compare(const void *a
, const void *b
) {
177 const sd_event_source
*x
= a
, *y
= b
;
182 /* Move most recently prepared ones last, so that we can stop
183 * preparing as soon as we hit one that has already been
184 * prepared in the current iteration */
185 if (x
->prepare_iteration
< y
->prepare_iteration
)
187 if (x
->prepare_iteration
> y
->prepare_iteration
)
190 /* Enabled ones first */
191 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
193 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
196 /* Lower priority values first */
197 if (x
->priority
< y
->priority
)
199 if (x
->priority
> y
->priority
)
202 /* Stability for the rest */
211 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
212 const sd_event_source
*x
= a
, *y
= b
;
214 assert(x
->type
== SOURCE_MONOTONIC
|| x
->type
== SOURCE_REALTIME
);
215 assert(y
->type
== SOURCE_MONOTONIC
|| y
->type
== SOURCE_REALTIME
);
217 /* Enabled ones first */
218 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
220 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
223 /* Move the pending ones to the end */
224 if (!x
->pending
&& y
->pending
)
226 if (x
->pending
&& !y
->pending
)
230 if (x
->time
.next
< y
->time
.next
)
232 if (x
->time
.next
> y
->time
.next
)
235 /* Stability for the rest */
244 static int latest_time_prioq_compare(const void *a
, const void *b
) {
245 const sd_event_source
*x
= a
, *y
= b
;
247 assert((x
->type
== SOURCE_MONOTONIC
&& y
->type
== SOURCE_MONOTONIC
) ||
248 (x
->type
== SOURCE_REALTIME
&& y
->type
== SOURCE_REALTIME
));
250 /* Enabled ones first */
251 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
253 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
256 /* Move the pending ones to the end */
257 if (!x
->pending
&& y
->pending
)
259 if (x
->pending
&& !y
->pending
)
263 if (x
->time
.next
+ x
->time
.accuracy
< y
->time
.next
+ y
->time
.accuracy
)
265 if (x
->time
.next
+ x
->time
.accuracy
> y
->time
.next
+ y
->time
.accuracy
)
268 /* Stability for the rest */
277 static int quit_prioq_compare(const void *a
, const void *b
) {
278 const sd_event_source
*x
= a
, *y
= b
;
280 assert(x
->type
== SOURCE_QUIT
);
281 assert(y
->type
== SOURCE_QUIT
);
283 /* Enabled ones first */
284 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
286 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
289 /* Lower priority values first */
290 if (x
->priority
< y
->priority
)
292 if (x
->priority
> y
->priority
)
295 /* Stability for the rest */
304 static void event_free(sd_event
*e
) {
307 if (e
->epoll_fd
>= 0)
308 close_nointr_nofail(e
->epoll_fd
);
310 if (e
->signal_fd
>= 0)
311 close_nointr_nofail(e
->signal_fd
);
313 if (e
->realtime_fd
>= 0)
314 close_nointr_nofail(e
->realtime_fd
);
316 if (e
->monotonic_fd
>= 0)
317 close_nointr_nofail(e
->monotonic_fd
);
319 prioq_free(e
->pending
);
320 prioq_free(e
->prepare
);
321 prioq_free(e
->monotonic_earliest
);
322 prioq_free(e
->monotonic_latest
);
323 prioq_free(e
->realtime_earliest
);
324 prioq_free(e
->realtime_latest
);
327 free(e
->signal_sources
);
329 hashmap_free(e
->child_sources
);
333 int sd_event_new(sd_event
** ret
) {
337 assert_return(ret
, -EINVAL
);
339 e
= new0(sd_event
, 1);
344 e
->signal_fd
= e
->realtime_fd
= e
->monotonic_fd
= e
->epoll_fd
= -1;
345 e
->realtime_next
= e
->monotonic_next
= (usec_t
) -1;
346 e
->original_pid
= getpid();
348 assert_se(sigemptyset(&e
->sigset
) == 0);
350 e
->pending
= prioq_new(pending_prioq_compare
);
356 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
357 if (e
->epoll_fd
< 0) {
370 sd_event
* sd_event_ref(sd_event
*e
) {
371 assert_return(e
, NULL
);
373 assert(e
->n_ref
>= 1);
379 sd_event
* sd_event_unref(sd_event
*e
) {
380 assert_return(e
, NULL
);
382 assert(e
->n_ref
>= 1);
391 static bool event_pid_changed(sd_event
*e
) {
394 /* We don't support people creating am event loop and keeping
395 * it around over a fork(). Let's complain. */
397 return e
->original_pid
!= getpid();
400 static int source_io_unregister(sd_event_source
*s
) {
404 assert(s
->type
== SOURCE_IO
);
406 if (!s
->io
.registered
)
409 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
413 s
->io
.registered
= false;
417 static int source_io_register(
422 struct epoll_event ev
= {};
426 assert(s
->type
== SOURCE_IO
);
427 assert(enabled
!= SD_EVENT_OFF
);
432 if (enabled
== SD_EVENT_ONESHOT
)
433 ev
.events
|= EPOLLONESHOT
;
435 if (s
->io
.registered
)
436 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
438 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
443 s
->io
.registered
= true;
448 static void source_free(sd_event_source
*s
) {
456 source_io_unregister(s
);
460 case SOURCE_MONOTONIC
:
461 prioq_remove(s
->event
->monotonic_earliest
, s
, &s
->time
.earliest_index
);
462 prioq_remove(s
->event
->monotonic_latest
, s
, &s
->time
.latest_index
);
465 case SOURCE_REALTIME
:
466 prioq_remove(s
->event
->realtime_earliest
, s
, &s
->time
.earliest_index
);
467 prioq_remove(s
->event
->realtime_latest
, s
, &s
->time
.latest_index
);
471 if (s
->signal
.sig
> 0) {
472 if (s
->signal
.sig
!= SIGCHLD
|| s
->event
->n_enabled_child_sources
== 0)
473 assert_se(sigdelset(&s
->event
->sigset
, s
->signal
.sig
) == 0);
475 if (s
->event
->signal_sources
)
476 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
482 if (s
->child
.pid
> 0) {
483 if (s
->enabled
!= SD_EVENT_OFF
) {
484 assert(s
->event
->n_enabled_child_sources
> 0);
485 s
->event
->n_enabled_child_sources
--;
488 if (!s
->event
->signal_sources
|| !s
->event
->signal_sources
[SIGCHLD
])
489 assert_se(sigdelset(&s
->event
->sigset
, SIGCHLD
) == 0);
491 hashmap_remove(s
->event
->child_sources
, INT_TO_PTR(s
->child
.pid
));
497 prioq_remove(s
->event
->quit
, s
, &s
->quit
.prioq_index
);
502 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
505 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
507 sd_event_unref(s
->event
);
513 static int source_set_pending(sd_event_source
*s
, bool b
) {
517 assert(s
->type
!= SOURCE_QUIT
);
525 s
->pending_iteration
= s
->event
->iteration
;
527 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
533 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
538 static sd_event_source
*source_new(sd_event
*e
, EventSourceType type
) {
543 s
= new0(sd_event_source
, 1);
548 s
->event
= sd_event_ref(e
);
550 s
->pending_index
= s
->prepare_index
= PRIOQ_IDX_NULL
;
559 sd_io_handler_t callback
,
561 sd_event_source
**ret
) {
566 assert_return(e
, -EINVAL
);
567 assert_return(fd
>= 0, -EINVAL
);
568 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
)), -EINVAL
);
569 assert_return(callback
, -EINVAL
);
570 assert_return(ret
, -EINVAL
);
571 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
572 assert_return(!event_pid_changed(e
), -ECHILD
);
574 s
= source_new(e
, SOURCE_IO
);
579 s
->io
.events
= events
;
580 s
->io
.callback
= callback
;
581 s
->userdata
= userdata
;
582 s
->enabled
= SD_EVENT_ON
;
584 r
= source_io_register(s
, s
->enabled
, events
);
594 static int event_setup_timer_fd(
596 EventSourceType type
,
600 struct epoll_event ev
= {};
607 if (_likely_(*timer_fd
>= 0))
610 fd
= timerfd_create(id
, TFD_NONBLOCK
|TFD_CLOEXEC
);
615 ev
.data
.ptr
= INT_TO_PTR(type
);
617 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
619 close_nointr_nofail(fd
);
623 /* When we sleep for longer, we try to realign the wakeup to
624 the same time wihtin each second, so that events all across
625 the system can be coalesced into a single CPU
626 wakeup. However, let's take some system-specific randomness
627 for this value, so that in a network of systems with synced
628 clocks timer events are distributed a bit. Here, we
629 calculate a perturbation usec offset from the boot ID. */
631 if (sd_id128_get_boot(&bootid
) >= 0)
632 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_SEC
;
638 static int event_add_time_internal(
640 EventSourceType type
,
647 sd_time_handler_t callback
,
649 sd_event_source
**ret
) {
654 assert_return(e
, -EINVAL
);
655 assert_return(callback
, -EINVAL
);
656 assert_return(ret
, -EINVAL
);
657 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
658 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
659 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
660 assert_return(!event_pid_changed(e
), -ECHILD
);
667 *earliest
= prioq_new(earliest_time_prioq_compare
);
673 *latest
= prioq_new(latest_time_prioq_compare
);
679 r
= event_setup_timer_fd(e
, type
, timer_fd
, id
);
684 s
= source_new(e
, type
);
689 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
690 s
->time
.callback
= callback
;
691 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
692 s
->userdata
= userdata
;
693 s
->enabled
= SD_EVENT_ONESHOT
;
695 r
= prioq_put(*earliest
, s
, &s
->time
.earliest_index
);
699 r
= prioq_put(*latest
, s
, &s
->time
.latest_index
);
711 int sd_event_add_monotonic(sd_event
*e
, uint64_t usec
, uint64_t accuracy
, sd_time_handler_t callback
, void *userdata
, sd_event_source
**ret
) {
712 return event_add_time_internal(e
, SOURCE_MONOTONIC
, &e
->monotonic_fd
, CLOCK_MONOTONIC
, &e
->monotonic_earliest
, &e
->monotonic_latest
, usec
, accuracy
, callback
, userdata
, ret
);
715 int sd_event_add_realtime(sd_event
*e
, uint64_t usec
, uint64_t accuracy
, sd_time_handler_t callback
, void *userdata
, sd_event_source
**ret
) {
716 return event_add_time_internal(e
, SOURCE_REALTIME
, &e
->realtime_fd
, CLOCK_REALTIME
, &e
->realtime_earliest
, &e
->monotonic_latest
, usec
, accuracy
, callback
, userdata
, ret
);
719 static int event_update_signal_fd(sd_event
*e
) {
720 struct epoll_event ev
= {};
726 add_to_epoll
= e
->signal_fd
< 0;
728 r
= signalfd(e
->signal_fd
, &e
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
);
738 ev
.data
.ptr
= INT_TO_PTR(SOURCE_SIGNAL
);
740 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->signal_fd
, &ev
);
742 close_nointr_nofail(e
->signal_fd
);
751 int sd_event_add_signal(
754 sd_signal_handler_t callback
,
756 sd_event_source
**ret
) {
761 assert_return(e
, -EINVAL
);
762 assert_return(sig
> 0, -EINVAL
);
763 assert_return(sig
< _NSIG
, -EINVAL
);
764 assert_return(callback
, -EINVAL
);
765 assert_return(ret
, -EINVAL
);
766 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
767 assert_return(!event_pid_changed(e
), -ECHILD
);
769 if (!e
->signal_sources
) {
770 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
771 if (!e
->signal_sources
)
773 } else if (e
->signal_sources
[sig
])
776 s
= source_new(e
, SOURCE_SIGNAL
);
781 s
->signal
.callback
= callback
;
782 s
->userdata
= userdata
;
783 s
->enabled
= SD_EVENT_ON
;
785 e
->signal_sources
[sig
] = s
;
786 assert_se(sigaddset(&e
->sigset
, sig
) == 0);
788 if (sig
!= SIGCHLD
|| e
->n_enabled_child_sources
== 0) {
789 r
= event_update_signal_fd(e
);
800 int sd_event_add_child(
804 sd_child_handler_t callback
,
806 sd_event_source
**ret
) {
811 assert_return(e
, -EINVAL
);
812 assert_return(pid
> 1, -EINVAL
);
813 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
814 assert_return(options
!= 0, -EINVAL
);
815 assert_return(callback
, -EINVAL
);
816 assert_return(ret
, -EINVAL
);
817 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
818 assert_return(!event_pid_changed(e
), -ECHILD
);
820 r
= hashmap_ensure_allocated(&e
->child_sources
, trivial_hash_func
, trivial_compare_func
);
824 if (hashmap_contains(e
->child_sources
, INT_TO_PTR(pid
)))
827 s
= source_new(e
, SOURCE_CHILD
);
832 s
->child
.options
= options
;
833 s
->child
.callback
= callback
;
834 s
->userdata
= userdata
;
835 s
->enabled
= SD_EVENT_ONESHOT
;
837 r
= hashmap_put(e
->child_sources
, INT_TO_PTR(pid
), s
);
843 e
->n_enabled_child_sources
++;
845 assert_se(sigaddset(&e
->sigset
, SIGCHLD
) == 0);
847 if (!e
->signal_sources
|| !e
->signal_sources
[SIGCHLD
]) {
848 r
= event_update_signal_fd(e
);
855 e
->need_process_child
= true;
861 int sd_event_add_defer(
863 sd_defer_handler_t callback
,
865 sd_event_source
**ret
) {
870 assert_return(e
, -EINVAL
);
871 assert_return(callback
, -EINVAL
);
872 assert_return(ret
, -EINVAL
);
873 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
874 assert_return(!event_pid_changed(e
), -ECHILD
);
876 s
= source_new(e
, SOURCE_DEFER
);
880 s
->defer
.callback
= callback
;
881 s
->userdata
= userdata
;
882 s
->enabled
= SD_EVENT_ONESHOT
;
884 r
= source_set_pending(s
, true);
894 int sd_event_add_quit(
896 sd_quit_handler_t callback
,
898 sd_event_source
**ret
) {
903 assert_return(e
, -EINVAL
);
904 assert_return(callback
, -EINVAL
);
905 assert_return(ret
, -EINVAL
);
906 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
907 assert_return(!event_pid_changed(e
), -ECHILD
);
910 e
->quit
= prioq_new(quit_prioq_compare
);
915 s
= source_new(e
, SOURCE_QUIT
);
919 s
->quit
.callback
= callback
;
920 s
->userdata
= userdata
;
921 s
->quit
.prioq_index
= PRIOQ_IDX_NULL
;
922 s
->enabled
= SD_EVENT_ONESHOT
;
924 r
= prioq_put(s
->event
->quit
, s
, &s
->quit
.prioq_index
);
934 sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
935 assert_return(s
, NULL
);
937 assert(s
->n_ref
>= 1);
943 sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
944 assert_return(s
, NULL
);
946 assert(s
->n_ref
>= 1);
955 sd_event
*sd_event_get(sd_event_source
*s
) {
956 assert_return(s
, NULL
);
961 int sd_event_source_get_pending(sd_event_source
*s
) {
962 assert_return(s
, -EINVAL
);
963 assert_return(s
->type
!= SOURCE_QUIT
, -EDOM
);
964 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
965 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
970 int sd_event_source_get_io_fd(sd_event_source
*s
) {
971 assert_return(s
, -EINVAL
);
972 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
973 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
978 int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
979 assert_return(s
, -EINVAL
);
980 assert_return(events
, -EINVAL
);
981 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
982 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
984 *events
= s
->io
.events
;
988 int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
991 assert_return(s
, -EINVAL
);
992 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
993 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
)), -EINVAL
);
994 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
995 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
997 if (s
->io
.events
== events
)
1000 if (s
->enabled
!= SD_EVENT_OFF
) {
1001 r
= source_io_register(s
, s
->io
.events
, events
);
1006 s
->io
.events
= events
;
1011 int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1012 assert_return(s
, -EINVAL
);
1013 assert_return(revents
, -EINVAL
);
1014 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1015 assert_return(s
->pending
, -ENODATA
);
1016 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1018 *revents
= s
->io
.revents
;
1022 int sd_event_source_get_signal(sd_event_source
*s
) {
1023 assert_return(s
, -EINVAL
);
1024 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1025 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1027 return s
->signal
.sig
;
1030 int sd_event_source_get_priority(sd_event_source
*s
, int *priority
) {
1031 assert_return(s
, -EINVAL
);
1032 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1037 int sd_event_source_set_priority(sd_event_source
*s
, int priority
) {
1038 assert_return(s
, -EINVAL
);
1039 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1040 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1042 if (s
->priority
== priority
)
1045 s
->priority
= priority
;
1048 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1051 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1053 if (s
->type
== SOURCE_QUIT
)
1054 prioq_reshuffle(s
->event
->quit
, s
, &s
->quit
.prioq_index
);
1059 int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1060 assert_return(s
, -EINVAL
);
1061 assert_return(m
, -EINVAL
);
1062 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1068 int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1071 assert_return(s
, -EINVAL
);
1072 assert_return(m
== SD_EVENT_OFF
|| m
== SD_EVENT_ON
|| m
== SD_EVENT_ONESHOT
, -EINVAL
);
1073 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1074 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1076 if (s
->enabled
== m
)
1079 if (m
== SD_EVENT_OFF
) {
1084 r
= source_io_unregister(s
);
1091 case SOURCE_MONOTONIC
:
1093 prioq_reshuffle(s
->event
->monotonic_earliest
, s
, &s
->time
.earliest_index
);
1094 prioq_reshuffle(s
->event
->monotonic_latest
, s
, &s
->time
.latest_index
);
1097 case SOURCE_REALTIME
:
1099 prioq_reshuffle(s
->event
->realtime_earliest
, s
, &s
->time
.earliest_index
);
1100 prioq_reshuffle(s
->event
->realtime_latest
, s
, &s
->time
.latest_index
);
1105 if (s
->signal
.sig
!= SIGCHLD
|| s
->event
->n_enabled_child_sources
== 0) {
1106 assert_se(sigdelset(&s
->event
->sigset
, s
->signal
.sig
) == 0);
1107 event_update_signal_fd(s
->event
);
1115 assert(s
->event
->n_enabled_child_sources
> 0);
1116 s
->event
->n_enabled_child_sources
--;
1118 if (!s
->event
->signal_sources
|| !s
->event
->signal_sources
[SIGCHLD
]) {
1119 assert_se(sigdelset(&s
->event
->sigset
, SIGCHLD
) == 0);
1120 event_update_signal_fd(s
->event
);
1127 prioq_reshuffle(s
->event
->quit
, s
, &s
->quit
.prioq_index
);
1139 r
= source_io_register(s
, m
, s
->io
.events
);
1146 case SOURCE_MONOTONIC
:
1148 prioq_reshuffle(s
->event
->monotonic_earliest
, s
, &s
->time
.earliest_index
);
1149 prioq_reshuffle(s
->event
->monotonic_latest
, s
, &s
->time
.latest_index
);
1152 case SOURCE_REALTIME
:
1154 prioq_reshuffle(s
->event
->realtime_earliest
, s
, &s
->time
.earliest_index
);
1155 prioq_reshuffle(s
->event
->realtime_latest
, s
, &s
->time
.latest_index
);
1161 if (s
->signal
.sig
!= SIGCHLD
|| s
->event
->n_enabled_child_sources
== 0) {
1162 assert_se(sigaddset(&s
->event
->sigset
, s
->signal
.sig
) == 0);
1163 event_update_signal_fd(s
->event
);
1170 if (s
->enabled
== SD_EVENT_OFF
) {
1171 s
->event
->n_enabled_child_sources
++;
1173 if (!s
->event
->signal_sources
|| !s
->event
->signal_sources
[SIGCHLD
]) {
1174 assert_se(sigaddset(&s
->event
->sigset
, SIGCHLD
) == 0);
1175 event_update_signal_fd(s
->event
);
1182 prioq_reshuffle(s
->event
->quit
, s
, &s
->quit
.prioq_index
);
1192 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1195 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1200 int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
1201 assert_return(s
, -EINVAL
);
1202 assert_return(usec
, -EINVAL
);
1203 assert_return(s
->type
== SOURCE_REALTIME
|| s
->type
== SOURCE_MONOTONIC
, -EDOM
);
1204 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1206 *usec
= s
->time
.next
;
1210 int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
1211 assert_return(s
, -EINVAL
);
1212 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1213 assert_return(s
->type
== SOURCE_REALTIME
|| s
->type
== SOURCE_MONOTONIC
, -EDOM
);
1214 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1215 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1217 if (s
->time
.next
== usec
)
1220 s
->time
.next
= usec
;
1222 if (s
->type
== SOURCE_REALTIME
) {
1223 prioq_reshuffle(s
->event
->realtime_earliest
, s
, &s
->time
.earliest_index
);
1224 prioq_reshuffle(s
->event
->realtime_latest
, s
, &s
->time
.latest_index
);
1226 prioq_reshuffle(s
->event
->monotonic_earliest
, s
, &s
->time
.earliest_index
);
1227 prioq_reshuffle(s
->event
->monotonic_latest
, s
, &s
->time
.latest_index
);
1233 int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
1234 assert_return(s
, -EINVAL
);
1235 assert_return(usec
, -EINVAL
);
1236 assert_return(s
->type
== SOURCE_REALTIME
|| s
->type
== SOURCE_MONOTONIC
, -EDOM
);
1237 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1239 *usec
= s
->time
.accuracy
;
1243 int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
1244 assert_return(s
, -EINVAL
);
1245 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1246 assert_return(s
->type
== SOURCE_REALTIME
|| s
->type
== SOURCE_MONOTONIC
, -EDOM
);
1247 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1248 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1251 usec
= DEFAULT_ACCURACY_USEC
;
1253 if (s
->time
.accuracy
== usec
)
1256 s
->time
.accuracy
= usec
;
1258 if (s
->type
== SOURCE_REALTIME
)
1259 prioq_reshuffle(s
->event
->realtime_latest
, s
, &s
->time
.latest_index
);
1261 prioq_reshuffle(s
->event
->monotonic_latest
, s
, &s
->time
.latest_index
);
1266 int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
1267 assert_return(s
, -EINVAL
);
1268 assert_return(pid
, -EINVAL
);
1269 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
1270 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1272 *pid
= s
->child
.pid
;
1276 int sd_event_source_set_prepare(sd_event_source
*s
, sd_prepare_handler_t callback
) {
1279 assert_return(s
, -EINVAL
);
1280 assert_return(s
->type
!= SOURCE_QUIT
, -EDOM
);
1281 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1282 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1284 if (s
->prepare
== callback
)
1287 if (callback
&& s
->prepare
) {
1288 s
->prepare
= callback
;
1292 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
1296 s
->prepare
= callback
;
1299 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
1303 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1308 void* sd_event_source_get_userdata(sd_event_source
*s
) {
1309 assert_return(s
, NULL
);
1314 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
1326 Find a good time to wake up again between times a and b. We
1327 have two goals here:
1329 a) We want to wake up as seldom as possible, hence prefer
1330 later times over earlier times.
1332 b) But if we have to wake up, then let's make sure to
1333 dispatch as much as possible on the entire system.
1335 We implement this by waking up everywhere at the same time
1336 within any given second if we can, synchronised via the
1337 perturbation value determined from the boot ID. If we can't,
1338 then we try to find the same spot in every a 250ms
1339 step. Otherwise, we pick the last possible time to wake up.
1342 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ e
->perturb
;
1344 if (_unlikely_(c
< USEC_PER_SEC
))
1353 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
1355 if (_unlikely_(c
< USEC_PER_MSEC
*250))
1358 c
-= USEC_PER_MSEC
*250;
1367 static int event_arm_timer(
1374 struct itimerspec its
= {};
1375 sd_event_source
*a
, *b
;
1382 a
= prioq_peek(earliest
);
1383 if (!a
|| a
->enabled
== SD_EVENT_OFF
)
1386 b
= prioq_peek(latest
);
1387 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
1389 t
= sleep_between(e
, a
->time
.next
, b
->time
.next
+ b
->time
.accuracy
);
1393 assert_se(timer_fd
>= 0);
1396 /* We don' want to disarm here, just mean some time looooong ago. */
1397 its
.it_value
.tv_sec
= 0;
1398 its
.it_value
.tv_nsec
= 1;
1400 timespec_store(&its
.it_value
, t
);
1402 r
= timerfd_settime(timer_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1410 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t events
) {
1413 assert(s
->type
== SOURCE_IO
);
1415 s
->io
.revents
= events
;
1418 If this is a oneshot event source, then we added it to the
1419 epoll with EPOLLONESHOT, hence we know it's not registered
1420 anymore. We can save a syscall here...
1423 if (s
->enabled
== SD_EVENT_ONESHOT
)
1424 s
->io
.registered
= false;
1426 return source_set_pending(s
, true);
1429 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
) {
1435 assert_return(events
== EPOLLIN
, -EIO
);
1437 ss
= read(fd
, &x
, sizeof(x
));
1439 if (errno
== EAGAIN
|| errno
== EINTR
)
1445 if (ss
!= sizeof(x
))
1451 static int process_timer(
1463 s
= prioq_peek(earliest
);
1466 s
->enabled
== SD_EVENT_OFF
||
1470 r
= source_set_pending(s
, true);
1474 prioq_reshuffle(earliest
, s
, &s
->time
.earliest_index
);
1475 prioq_reshuffle(latest
, s
, &s
->time
.latest_index
);
1481 static int process_child(sd_event
*e
) {
1488 e
->need_process_child
= false;
1491 So, this is ugly. We iteratively invoke waitid() with P_PID
1492 + WNOHANG for each PID we wait for, instead of using
1493 P_ALL. This is because we only want to get child
1494 information of very specific child processes, and not all
1495 of them. We might not have processed the SIGCHLD even of a
1496 previous invocation and we don't want to maintain a
1497 unbounded *per-child* event queue, hence we really don't
1498 want anything flushed out of the kernel's queue that we
1499 don't care about. Since this is O(n) this means that if you
1500 have a lot of processes you probably want to handle SIGCHLD
1504 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
1505 assert(s
->type
== SOURCE_CHILD
);
1510 if (s
->enabled
== SD_EVENT_OFF
)
1513 zero(s
->child
.siginfo
);
1514 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|s
->child
.options
);
1518 if (s
->child
.siginfo
.si_pid
!= 0) {
1519 r
= source_set_pending(s
, true);
1528 static int process_signal(sd_event
*e
, uint32_t events
) {
1529 struct signalfd_siginfo si
;
1530 bool read_one
= false;
1535 assert_return(events
== EPOLLIN
, -EIO
);
1540 ss
= read(e
->signal_fd
, &si
, sizeof(si
));
1542 if (errno
== EAGAIN
|| errno
== EINTR
)
1548 if (ss
!= sizeof(si
))
1553 if (si
.ssi_signo
== SIGCHLD
) {
1554 r
= process_child(e
);
1557 if (r
> 0 || !e
->signal_sources
[si
.ssi_signo
])
1560 s
= e
->signal_sources
[si
.ssi_signo
];
1565 s
->signal
.siginfo
= si
;
1566 r
= source_set_pending(s
, true);
1575 static int source_dispatch(sd_event_source
*s
) {
1579 assert(s
->pending
|| s
->type
== SOURCE_QUIT
);
1581 if (s
->type
!= SOURCE_DEFER
&& s
->type
!= SOURCE_QUIT
) {
1582 r
= source_set_pending(s
, false);
1587 if (s
->enabled
== SD_EVENT_ONESHOT
) {
1588 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
1596 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
1599 case SOURCE_MONOTONIC
:
1600 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
1603 case SOURCE_REALTIME
:
1604 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
1608 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
1612 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
1616 r
= s
->defer
.callback(s
, s
->userdata
);
1620 r
= s
->quit
.callback(s
, s
->userdata
);
1627 static int event_prepare(sd_event
*e
) {
1635 s
= prioq_peek(e
->prepare
);
1636 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
1639 s
->prepare_iteration
= e
->iteration
;
1640 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
1645 r
= s
->prepare(s
, s
->userdata
);
1654 static int dispatch_quit(sd_event
*e
) {
1660 p
= prioq_peek(e
->quit
);
1661 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
1662 e
->state
= SD_EVENT_FINISHED
;
1668 e
->state
= SD_EVENT_QUITTING
;
1670 r
= source_dispatch(p
);
1672 e
->state
= SD_EVENT_PASSIVE
;
1678 static sd_event_source
* event_next_pending(sd_event
*e
) {
1683 p
= prioq_peek(e
->pending
);
1687 if (p
->enabled
== SD_EVENT_OFF
)
1693 int sd_event_run(sd_event
*e
, uint64_t timeout
) {
1694 struct epoll_event ev_queue
[EPOLL_QUEUE_MAX
];
1698 assert_return(e
, -EINVAL
);
1699 assert_return(!event_pid_changed(e
), -ECHILD
);
1700 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1701 assert_return(e
->state
== SD_EVENT_PASSIVE
, -EBUSY
);
1703 if (e
->quit_requested
)
1704 return dispatch_quit(e
);
1708 e
->state
= SD_EVENT_RUNNING
;
1710 r
= event_prepare(e
);
1714 if (event_next_pending(e
) || e
->need_process_child
)
1718 r
= event_arm_timer(e
, e
->monotonic_fd
, e
->monotonic_earliest
, e
->monotonic_latest
, &e
->monotonic_next
);
1722 r
= event_arm_timer(e
, e
->realtime_fd
, e
->realtime_earliest
, e
->realtime_latest
, &e
->realtime_next
);
1727 m
= epoll_wait(e
->epoll_fd
, ev_queue
, EPOLL_QUEUE_MAX
,
1728 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
1734 dual_timestamp_get(&e
->timestamp
);
1736 for (i
= 0; i
< m
; i
++) {
1738 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_MONOTONIC
))
1739 r
= flush_timer(e
, e
->monotonic_fd
, ev_queue
[i
].events
);
1740 else if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_REALTIME
))
1741 r
= flush_timer(e
, e
->realtime_fd
, ev_queue
[i
].events
);
1742 else if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_SIGNAL
))
1743 r
= process_signal(e
, ev_queue
[i
].events
);
1745 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
1751 r
= process_timer(e
, e
->timestamp
.monotonic
, e
->monotonic_earliest
, e
->monotonic_latest
);
1755 r
= process_timer(e
, e
->timestamp
.realtime
, e
->realtime_earliest
, e
->realtime_latest
);
1759 if (e
->need_process_child
) {
1760 r
= process_child(e
);
1765 p
= event_next_pending(e
);
1771 r
= source_dispatch(p
);
1774 e
->state
= SD_EVENT_PASSIVE
;
1780 int sd_event_loop(sd_event
*e
) {
1783 assert_return(e
, -EINVAL
);
1784 assert_return(!event_pid_changed(e
), -ECHILD
);
1785 assert_return(e
->state
== SD_EVENT_PASSIVE
, -EBUSY
);
1789 while (e
->state
!= SD_EVENT_FINISHED
) {
1790 r
= sd_event_run(e
, (uint64_t) -1);
1802 int sd_event_get_state(sd_event
*e
) {
1803 assert_return(e
, -EINVAL
);
1804 assert_return(!event_pid_changed(e
), -ECHILD
);
1809 int sd_event_get_quit(sd_event
*e
) {
1810 assert_return(e
, -EINVAL
);
1811 assert_return(!event_pid_changed(e
), -ECHILD
);
1813 return e
->quit_requested
;
1816 int sd_event_request_quit(sd_event
*e
) {
1817 assert_return(e
, -EINVAL
);
1818 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1819 assert_return(!event_pid_changed(e
), -ECHILD
);
1821 e
->quit_requested
= true;
1825 int sd_event_get_now_realtime(sd_event
*e
, uint64_t *usec
) {
1826 assert_return(e
, -EINVAL
);
1827 assert_return(usec
, -EINVAL
);
1828 assert_return(dual_timestamp_is_set(&e
->timestamp
), -ENODATA
);
1829 assert_return(!event_pid_changed(e
), -ECHILD
);
1831 *usec
= e
->timestamp
.realtime
;
1835 int sd_event_get_now_monotonic(sd_event
*e
, uint64_t *usec
) {
1836 assert_return(e
, -EINVAL
);
1837 assert_return(usec
, -EINVAL
);
1838 assert_return(dual_timestamp_is_set(&e
->timestamp
), -ENODATA
);
1839 assert_return(!event_pid_changed(e
), -ECHILD
);
1841 *usec
= e
->timestamp
.monotonic
;