1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <sys/timerfd.h>
11 #include "alloc-util.h"
12 #include "event-source.h"
18 #include "memory-util.h"
21 #include "process-util.h"
23 #include "signal-util.h"
24 #include "string-table.h"
25 #include "string-util.h"
26 #include "time-util.h"
28 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
30 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
32 [SOURCE_TIME_REALTIME
] = "realtime",
33 [SOURCE_TIME_BOOTTIME
] = "bootime",
34 [SOURCE_TIME_MONOTONIC
] = "monotonic",
35 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
36 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
37 [SOURCE_SIGNAL
] = "signal",
38 [SOURCE_CHILD
] = "child",
39 [SOURCE_DEFER
] = "defer",
40 [SOURCE_POST
] = "post",
41 [SOURCE_EXIT
] = "exit",
42 [SOURCE_WATCHDOG
] = "watchdog",
43 [SOURCE_INOTIFY
] = "inotify",
46 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
48 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59 /* timerfd_create() only supports these five clocks so far. We
60 * can add support for more clocks when the kernel learns to
61 * deal with them, too. */
62 struct clock_data realtime
;
63 struct clock_data boottime
;
64 struct clock_data monotonic
;
65 struct clock_data realtime_alarm
;
66 struct clock_data boottime_alarm
;
70 sd_event_source
**signal_sources
; /* indexed by signal number */
71 Hashmap
*signal_data
; /* indexed by priority */
73 Hashmap
*child_sources
;
74 unsigned n_enabled_child_sources
;
80 Hashmap
*inotify_data
; /* indexed by priority */
82 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
83 LIST_HEAD(struct inode_data
, inode_data_to_close
);
85 /* A list of inotify objects that already have events buffered which aren't processed yet */
86 LIST_HEAD(struct inotify_data
, inotify_data_buffered
);
91 triple_timestamp timestamp
;
94 bool exit_requested
:1;
95 bool need_process_child
:1;
97 bool profile_delays
:1;
102 sd_event
**default_event_ptr
;
104 usec_t watchdog_last
, watchdog_period
;
108 LIST_HEAD(sd_event_source
, sources
);
110 usec_t last_run
, last_log
;
111 unsigned delays
[sizeof(usec_t
) * 8];
114 static thread_local sd_event
*default_event
= NULL
;
116 static void source_disconnect(sd_event_source
*s
);
117 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
119 static sd_event
*event_resolve(sd_event
*e
) {
120 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
123 static int pending_prioq_compare(const void *a
, const void *b
) {
124 const sd_event_source
*x
= a
, *y
= b
;
130 /* Enabled ones first */
131 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
133 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
136 /* Lower priority values first */
137 r
= CMP(x
->priority
, y
->priority
);
141 /* Older entries first */
142 return CMP(x
->pending_iteration
, y
->pending_iteration
);
145 static int prepare_prioq_compare(const void *a
, const void *b
) {
146 const sd_event_source
*x
= a
, *y
= b
;
152 /* Enabled ones first */
153 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
155 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
158 /* Move most recently prepared ones last, so that we can stop
159 * preparing as soon as we hit one that has already been
160 * prepared in the current iteration */
161 r
= CMP(x
->prepare_iteration
, y
->prepare_iteration
);
165 /* Lower priority values first */
166 return CMP(x
->priority
, y
->priority
);
169 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
170 const sd_event_source
*x
= a
, *y
= b
;
172 assert(EVENT_SOURCE_IS_TIME(x
->type
));
173 assert(x
->type
== y
->type
);
175 /* Enabled ones first */
176 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
178 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
181 /* Move the pending ones to the end */
182 if (!x
->pending
&& y
->pending
)
184 if (x
->pending
&& !y
->pending
)
188 return CMP(x
->time
.next
, y
->time
.next
);
191 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
192 return usec_add(s
->time
.next
, s
->time
.accuracy
);
195 static int latest_time_prioq_compare(const void *a
, const void *b
) {
196 const sd_event_source
*x
= a
, *y
= b
;
198 assert(EVENT_SOURCE_IS_TIME(x
->type
));
199 assert(x
->type
== y
->type
);
201 /* Enabled ones first */
202 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
204 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
207 /* Move the pending ones to the end */
208 if (!x
->pending
&& y
->pending
)
210 if (x
->pending
&& !y
->pending
)
214 return CMP(time_event_source_latest(x
), time_event_source_latest(y
));
217 static int exit_prioq_compare(const void *a
, const void *b
) {
218 const sd_event_source
*x
= a
, *y
= b
;
220 assert(x
->type
== SOURCE_EXIT
);
221 assert(y
->type
== SOURCE_EXIT
);
223 /* Enabled ones first */
224 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
226 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
229 /* Lower priority values first */
230 return CMP(x
->priority
, y
->priority
);
233 static void free_clock_data(struct clock_data
*d
) {
235 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
238 prioq_free(d
->earliest
);
239 prioq_free(d
->latest
);
242 static sd_event
*event_free(sd_event
*e
) {
247 while ((s
= e
->sources
)) {
249 source_disconnect(s
);
250 sd_event_source_unref(s
);
253 assert(e
->n_sources
== 0);
255 if (e
->default_event_ptr
)
256 *(e
->default_event_ptr
) = NULL
;
258 safe_close(e
->epoll_fd
);
259 safe_close(e
->watchdog_fd
);
261 free_clock_data(&e
->realtime
);
262 free_clock_data(&e
->boottime
);
263 free_clock_data(&e
->monotonic
);
264 free_clock_data(&e
->realtime_alarm
);
265 free_clock_data(&e
->boottime_alarm
);
267 prioq_free(e
->pending
);
268 prioq_free(e
->prepare
);
271 free(e
->signal_sources
);
272 hashmap_free(e
->signal_data
);
274 hashmap_free(e
->inotify_data
);
276 hashmap_free(e
->child_sources
);
277 set_free(e
->post_sources
);
282 _public_
int sd_event_new(sd_event
** ret
) {
286 assert_return(ret
, -EINVAL
);
288 e
= new(sd_event
, 1);
296 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
298 .realtime
.next
= USEC_INFINITY
,
299 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
301 .boottime
.next
= USEC_INFINITY
,
302 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
304 .monotonic
.next
= USEC_INFINITY
,
305 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
306 .realtime_alarm
.fd
= -1,
307 .realtime_alarm
.next
= USEC_INFINITY
,
308 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
309 .boottime_alarm
.fd
= -1,
310 .boottime_alarm
.next
= USEC_INFINITY
,
311 .perturb
= USEC_INFINITY
,
312 .original_pid
= getpid_cached(),
315 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
319 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
320 if (e
->epoll_fd
< 0) {
325 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
327 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
328 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
329 e
->profile_delays
= true;
340 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event
, sd_event
, event_free
);
342 _public_ sd_event_source
* sd_event_source_disable_unref(sd_event_source
*s
) {
344 (void) sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
345 return sd_event_source_unref(s
);
348 static bool event_pid_changed(sd_event
*e
) {
351 /* We don't support people creating an event loop and keeping
352 * it around over a fork(). Let's complain. */
354 return e
->original_pid
!= getpid_cached();
357 static void source_io_unregister(sd_event_source
*s
) {
361 assert(s
->type
== SOURCE_IO
);
363 if (event_pid_changed(s
->event
))
366 if (!s
->io
.registered
)
369 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
371 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
372 strna(s
->description
), event_source_type_to_string(s
->type
));
374 s
->io
.registered
= false;
377 static int source_io_register(
382 struct epoll_event ev
;
386 assert(s
->type
== SOURCE_IO
);
387 assert(enabled
!= SD_EVENT_OFF
);
389 ev
= (struct epoll_event
) {
390 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
394 if (s
->io
.registered
)
395 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
397 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
401 s
->io
.registered
= true;
406 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
410 case SOURCE_TIME_REALTIME
:
411 return CLOCK_REALTIME
;
413 case SOURCE_TIME_BOOTTIME
:
414 return CLOCK_BOOTTIME
;
416 case SOURCE_TIME_MONOTONIC
:
417 return CLOCK_MONOTONIC
;
419 case SOURCE_TIME_REALTIME_ALARM
:
420 return CLOCK_REALTIME_ALARM
;
422 case SOURCE_TIME_BOOTTIME_ALARM
:
423 return CLOCK_BOOTTIME_ALARM
;
426 return (clockid_t
) -1;
430 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
435 return SOURCE_TIME_REALTIME
;
438 return SOURCE_TIME_BOOTTIME
;
440 case CLOCK_MONOTONIC
:
441 return SOURCE_TIME_MONOTONIC
;
443 case CLOCK_REALTIME_ALARM
:
444 return SOURCE_TIME_REALTIME_ALARM
;
446 case CLOCK_BOOTTIME_ALARM
:
447 return SOURCE_TIME_BOOTTIME_ALARM
;
450 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
454 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
459 case SOURCE_TIME_REALTIME
:
462 case SOURCE_TIME_BOOTTIME
:
465 case SOURCE_TIME_MONOTONIC
:
466 return &e
->monotonic
;
468 case SOURCE_TIME_REALTIME_ALARM
:
469 return &e
->realtime_alarm
;
471 case SOURCE_TIME_BOOTTIME_ALARM
:
472 return &e
->boottime_alarm
;
479 static void event_free_signal_data(sd_event
*e
, struct signal_data
*d
) {
485 hashmap_remove(e
->signal_data
, &d
->priority
);
490 static int event_make_signal_data(
493 struct signal_data
**ret
) {
495 struct epoll_event ev
;
496 struct signal_data
*d
;
504 if (event_pid_changed(e
))
507 if (e
->signal_sources
&& e
->signal_sources
[sig
])
508 priority
= e
->signal_sources
[sig
]->priority
;
510 priority
= SD_EVENT_PRIORITY_NORMAL
;
512 d
= hashmap_get(e
->signal_data
, &priority
);
514 if (sigismember(&d
->sigset
, sig
) > 0) {
520 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
524 d
= new(struct signal_data
, 1);
528 *d
= (struct signal_data
) {
529 .wakeup
= WAKEUP_SIGNAL_DATA
,
531 .priority
= priority
,
534 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
544 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
546 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
560 d
->fd
= fd_move_above_stdio(r
);
562 ev
= (struct epoll_event
) {
567 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
580 event_free_signal_data(e
, d
);
585 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
589 /* Turns off the specified signal in the signal data
590 * object. If the signal mask of the object becomes empty that
593 if (sigismember(&d
->sigset
, sig
) == 0)
596 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
598 if (sigisemptyset(&d
->sigset
)) {
599 /* If all the mask is all-zero we can get rid of the structure */
600 event_free_signal_data(e
, d
);
606 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
607 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
610 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
611 struct signal_data
*d
;
612 static const int64_t zero_priority
= 0;
616 /* Rechecks if the specified signal is still something we are
617 * interested in. If not, we'll unmask it, and possibly drop
618 * the signalfd for it. */
620 if (sig
== SIGCHLD
&&
621 e
->n_enabled_child_sources
> 0)
624 if (e
->signal_sources
&&
625 e
->signal_sources
[sig
] &&
626 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
630 * The specified signal might be enabled in three different queues:
632 * 1) the one that belongs to the priority passed (if it is non-NULL)
633 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
634 * 3) the 0 priority (to cover the SIGCHLD case)
636 * Hence, let's remove it from all three here.
640 d
= hashmap_get(e
->signal_data
, priority
);
642 event_unmask_signal_data(e
, d
, sig
);
645 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
646 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
648 event_unmask_signal_data(e
, d
, sig
);
651 d
= hashmap_get(e
->signal_data
, &zero_priority
);
653 event_unmask_signal_data(e
, d
, sig
);
656 static void source_disconnect(sd_event_source
*s
) {
664 assert(s
->event
->n_sources
> 0);
670 source_io_unregister(s
);
674 case SOURCE_TIME_REALTIME
:
675 case SOURCE_TIME_BOOTTIME
:
676 case SOURCE_TIME_MONOTONIC
:
677 case SOURCE_TIME_REALTIME_ALARM
:
678 case SOURCE_TIME_BOOTTIME_ALARM
: {
679 struct clock_data
*d
;
681 d
= event_get_clock_data(s
->event
, s
->type
);
684 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
685 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
686 d
->needs_rearm
= true;
691 if (s
->signal
.sig
> 0) {
693 if (s
->event
->signal_sources
)
694 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
696 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
702 if (s
->child
.pid
> 0) {
703 if (s
->enabled
!= SD_EVENT_OFF
) {
704 assert(s
->event
->n_enabled_child_sources
> 0);
705 s
->event
->n_enabled_child_sources
--;
708 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
709 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
719 set_remove(s
->event
->post_sources
, s
);
723 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
726 case SOURCE_INOTIFY
: {
727 struct inode_data
*inode_data
;
729 inode_data
= s
->inotify
.inode_data
;
731 struct inotify_data
*inotify_data
;
732 assert_se(inotify_data
= inode_data
->inotify_data
);
734 /* Detach this event source from the inode object */
735 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
736 s
->inotify
.inode_data
= NULL
;
739 assert(inotify_data
->n_pending
> 0);
740 inotify_data
->n_pending
--;
743 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
744 * continued to being watched. That's because inotify doesn't really have an API for that: we
745 * can only change watch masks with access to the original inode either by fd or by path. But
746 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
747 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
748 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
749 * there), but given the need for open_by_handle_at() which is privileged and not universally
750 * available this would be quite an incomplete solution. Hence we go the other way, leave the
751 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
752 * anymore after reception. Yes, this sucks, but … Linux … */
754 /* Maybe release the inode data (and its inotify) */
755 event_gc_inode_data(s
->event
, inode_data
);
762 assert_not_reached("Wut? I shouldn't exist.");
766 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
769 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
773 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
775 LIST_REMOVE(sources
, event
->sources
, s
);
779 sd_event_unref(event
);
782 static void source_free(sd_event_source
*s
) {
785 source_disconnect(s
);
787 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
788 s
->io
.fd
= safe_close(s
->io
.fd
);
790 if (s
->destroy_callback
)
791 s
->destroy_callback(s
->userdata
);
793 free(s
->description
);
796 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source
*, source_free
);
798 static int source_set_pending(sd_event_source
*s
, bool b
) {
802 assert(s
->type
!= SOURCE_EXIT
);
810 s
->pending_iteration
= s
->event
->iteration
;
812 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
818 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
820 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
821 struct clock_data
*d
;
823 d
= event_get_clock_data(s
->event
, s
->type
);
826 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
827 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
828 d
->needs_rearm
= true;
831 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
832 struct signal_data
*d
;
834 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
835 if (d
&& d
->current
== s
)
839 if (s
->type
== SOURCE_INOTIFY
) {
841 assert(s
->inotify
.inode_data
);
842 assert(s
->inotify
.inode_data
->inotify_data
);
845 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
847 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
848 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
855 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
860 s
= new(sd_event_source
, 1);
864 *s
= (struct sd_event_source
) {
867 .floating
= floating
,
869 .pending_index
= PRIOQ_IDX_NULL
,
870 .prepare_index
= PRIOQ_IDX_NULL
,
876 LIST_PREPEND(sources
, e
->sources
, s
);
882 _public_
int sd_event_add_io(
884 sd_event_source
**ret
,
887 sd_event_io_handler_t callback
,
890 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
893 assert_return(e
, -EINVAL
);
894 assert_return(e
= event_resolve(e
), -ENOPKG
);
895 assert_return(fd
>= 0, -EBADF
);
896 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
897 assert_return(callback
, -EINVAL
);
898 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
899 assert_return(!event_pid_changed(e
), -ECHILD
);
901 s
= source_new(e
, !ret
, SOURCE_IO
);
905 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
907 s
->io
.events
= events
;
908 s
->io
.callback
= callback
;
909 s
->userdata
= userdata
;
910 s
->enabled
= SD_EVENT_ON
;
912 r
= source_io_register(s
, s
->enabled
, events
);
923 static void initialize_perturb(sd_event
*e
) {
924 sd_id128_t bootid
= {};
926 /* When we sleep for longer, we try to realign the wakeup to
927 the same time within each minute/second/250ms, so that
928 events all across the system can be coalesced into a single
929 CPU wakeup. However, let's take some system-specific
930 randomness for this value, so that in a network of systems
931 with synced clocks timer events are distributed a
932 bit. Here, we calculate a perturbation usec offset from the
935 if (_likely_(e
->perturb
!= USEC_INFINITY
))
938 if (sd_id128_get_boot(&bootid
) >= 0)
939 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
942 static int event_setup_timer_fd(
944 struct clock_data
*d
,
947 struct epoll_event ev
;
953 if (_likely_(d
->fd
>= 0))
956 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
960 fd
= fd_move_above_stdio(fd
);
962 ev
= (struct epoll_event
) {
967 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
977 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
980 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
983 _public_
int sd_event_add_time(
985 sd_event_source
**ret
,
989 sd_event_time_handler_t callback
,
992 EventSourceType type
;
993 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
994 struct clock_data
*d
;
997 assert_return(e
, -EINVAL
);
998 assert_return(e
= event_resolve(e
), -ENOPKG
);
999 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1000 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1001 assert_return(!event_pid_changed(e
), -ECHILD
);
1003 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1006 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1011 callback
= time_exit_callback
;
1013 d
= event_get_clock_data(e
, type
);
1016 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1020 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1025 r
= event_setup_timer_fd(e
, d
, clock
);
1030 s
= source_new(e
, !ret
, type
);
1034 s
->time
.next
= usec
;
1035 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1036 s
->time
.callback
= callback
;
1037 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1038 s
->userdata
= userdata
;
1039 s
->enabled
= SD_EVENT_ONESHOT
;
1041 d
->needs_rearm
= true;
1043 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1047 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1058 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1061 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1064 _public_
int sd_event_add_signal(
1066 sd_event_source
**ret
,
1068 sd_event_signal_handler_t callback
,
1071 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1072 struct signal_data
*d
;
1076 assert_return(e
, -EINVAL
);
1077 assert_return(e
= event_resolve(e
), -ENOPKG
);
1078 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1079 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1080 assert_return(!event_pid_changed(e
), -ECHILD
);
1083 callback
= signal_exit_callback
;
1085 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1089 if (!sigismember(&ss
, sig
))
1092 if (!e
->signal_sources
) {
1093 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1094 if (!e
->signal_sources
)
1096 } else if (e
->signal_sources
[sig
])
1099 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1103 s
->signal
.sig
= sig
;
1104 s
->signal
.callback
= callback
;
1105 s
->userdata
= userdata
;
1106 s
->enabled
= SD_EVENT_ON
;
1108 e
->signal_sources
[sig
] = s
;
1110 r
= event_make_signal_data(e
, sig
, &d
);
1114 /* Use the signal name as description for the event source by default */
1115 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1124 _public_
int sd_event_add_child(
1126 sd_event_source
**ret
,
1129 sd_event_child_handler_t callback
,
1132 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1135 assert_return(e
, -EINVAL
);
1136 assert_return(e
= event_resolve(e
), -ENOPKG
);
1137 assert_return(pid
> 1, -EINVAL
);
1138 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1139 assert_return(options
!= 0, -EINVAL
);
1140 assert_return(callback
, -EINVAL
);
1141 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1142 assert_return(!event_pid_changed(e
), -ECHILD
);
1144 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1148 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1151 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1156 s
->child
.options
= options
;
1157 s
->child
.callback
= callback
;
1158 s
->userdata
= userdata
;
1159 s
->enabled
= SD_EVENT_ONESHOT
;
1161 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1165 e
->n_enabled_child_sources
++;
1167 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1169 e
->n_enabled_child_sources
--;
1173 e
->need_process_child
= true;
1182 _public_
int sd_event_add_defer(
1184 sd_event_source
**ret
,
1185 sd_event_handler_t callback
,
1188 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1191 assert_return(e
, -EINVAL
);
1192 assert_return(e
= event_resolve(e
), -ENOPKG
);
1193 assert_return(callback
, -EINVAL
);
1194 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1195 assert_return(!event_pid_changed(e
), -ECHILD
);
1197 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1201 s
->defer
.callback
= callback
;
1202 s
->userdata
= userdata
;
1203 s
->enabled
= SD_EVENT_ONESHOT
;
1205 r
= source_set_pending(s
, true);
1216 _public_
int sd_event_add_post(
1218 sd_event_source
**ret
,
1219 sd_event_handler_t callback
,
1222 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1225 assert_return(e
, -EINVAL
);
1226 assert_return(e
= event_resolve(e
), -ENOPKG
);
1227 assert_return(callback
, -EINVAL
);
1228 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1229 assert_return(!event_pid_changed(e
), -ECHILD
);
1231 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1235 s
= source_new(e
, !ret
, SOURCE_POST
);
1239 s
->post
.callback
= callback
;
1240 s
->userdata
= userdata
;
1241 s
->enabled
= SD_EVENT_ON
;
1243 r
= set_put(e
->post_sources
, s
);
1254 _public_
int sd_event_add_exit(
1256 sd_event_source
**ret
,
1257 sd_event_handler_t callback
,
1260 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1263 assert_return(e
, -EINVAL
);
1264 assert_return(e
= event_resolve(e
), -ENOPKG
);
1265 assert_return(callback
, -EINVAL
);
1266 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1267 assert_return(!event_pid_changed(e
), -ECHILD
);
1269 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1273 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1277 s
->exit
.callback
= callback
;
1278 s
->userdata
= userdata
;
1279 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1280 s
->enabled
= SD_EVENT_ONESHOT
;
1282 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1293 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
1299 assert(hashmap_isempty(d
->inodes
));
1300 assert(hashmap_isempty(d
->wd
));
1302 if (d
->buffer_filled
> 0)
1303 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
1305 hashmap_free(d
->inodes
);
1306 hashmap_free(d
->wd
);
1308 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
1311 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
1312 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
1319 static int event_make_inotify_data(
1322 struct inotify_data
**ret
) {
1324 _cleanup_close_
int fd
= -1;
1325 struct inotify_data
*d
;
1326 struct epoll_event ev
;
1331 d
= hashmap_get(e
->inotify_data
, &priority
);
1338 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
1342 fd
= fd_move_above_stdio(fd
);
1344 r
= hashmap_ensure_allocated(&e
->inotify_data
, &uint64_hash_ops
);
1348 d
= new(struct inotify_data
, 1);
1352 *d
= (struct inotify_data
) {
1353 .wakeup
= WAKEUP_INOTIFY_DATA
,
1355 .priority
= priority
,
1358 r
= hashmap_put(e
->inotify_data
, &d
->priority
, d
);
1360 d
->fd
= safe_close(d
->fd
);
1365 ev
= (struct epoll_event
) {
1370 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
1372 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1373 * remove the fd from the epoll first, which we don't want as we couldn't
1374 * add it in the first place. */
1375 event_free_inotify_data(e
, d
);
1385 static int inode_data_compare(const struct inode_data
*x
, const struct inode_data
*y
) {
1391 r
= CMP(x
->dev
, y
->dev
);
1395 return CMP(x
->ino
, y
->ino
);
1398 static void inode_data_hash_func(const struct inode_data
*d
, struct siphash
*state
) {
1401 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
1402 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
1405 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops
, struct inode_data
, inode_data_hash_func
, inode_data_compare
);
1407 static void event_free_inode_data(
1409 struct inode_data
*d
) {
1416 assert(!d
->event_sources
);
1419 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
1423 if (d
->inotify_data
) {
1426 if (d
->inotify_data
->fd
>= 0) {
1427 /* So here's a problem. At the time this runs the watch descriptor might already be
1428 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1429 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1430 * likely case to happen. */
1432 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
1433 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
1436 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
1439 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
1445 static void event_gc_inode_data(
1447 struct inode_data
*d
) {
1449 struct inotify_data
*inotify_data
;
1456 if (d
->event_sources
)
1459 inotify_data
= d
->inotify_data
;
1460 event_free_inode_data(e
, d
);
1462 if (inotify_data
&& hashmap_isempty(inotify_data
->inodes
))
1463 event_free_inotify_data(e
, inotify_data
);
1466 static int event_make_inode_data(
1468 struct inotify_data
*inotify_data
,
1471 struct inode_data
**ret
) {
1473 struct inode_data
*d
, key
;
1477 assert(inotify_data
);
1479 key
= (struct inode_data
) {
1484 d
= hashmap_get(inotify_data
->inodes
, &key
);
1492 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
1496 d
= new(struct inode_data
, 1);
1500 *d
= (struct inode_data
) {
1505 .inotify_data
= inotify_data
,
1508 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
1520 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
1521 bool excl_unlink
= true;
1522 uint32_t combined
= 0;
1527 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1528 * the IN_EXCL_UNLINK flag is ANDed instead.
1530 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1531 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1532 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1533 * events we don't care for client-side. */
1535 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
1537 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
1538 excl_unlink
= false;
1540 combined
|= s
->inotify
.mask
;
1543 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
1546 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
1547 uint32_t combined_mask
;
1553 combined_mask
= inode_data_determine_mask(d
);
1555 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
1558 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
1562 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
1567 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
1569 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
1575 } else if (d
->wd
!= wd
) {
1577 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1578 (void) inotify_rm_watch(d
->fd
, wd
);
1582 d
->combined_mask
= combined_mask
;
1586 _public_
int sd_event_add_inotify(
1588 sd_event_source
**ret
,
1591 sd_event_inotify_handler_t callback
,
1594 struct inotify_data
*inotify_data
= NULL
;
1595 struct inode_data
*inode_data
= NULL
;
1596 _cleanup_close_
int fd
= -1;
1597 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1601 assert_return(e
, -EINVAL
);
1602 assert_return(e
= event_resolve(e
), -ENOPKG
);
1603 assert_return(path
, -EINVAL
);
1604 assert_return(callback
, -EINVAL
);
1605 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1606 assert_return(!event_pid_changed(e
), -ECHILD
);
1608 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1609 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1610 * the user can't use them for us. */
1611 if (mask
& IN_MASK_ADD
)
1614 fd
= open(path
, O_PATH
|O_CLOEXEC
|
1615 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0)|
1616 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
1620 if (fstat(fd
, &st
) < 0)
1623 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
1627 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
1628 s
->inotify
.mask
= mask
;
1629 s
->inotify
.callback
= callback
;
1630 s
->userdata
= userdata
;
1632 /* Allocate an inotify object for this priority, and an inode object within it */
1633 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
1637 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
1639 event_free_inotify_data(e
, inotify_data
);
1643 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1644 * the event source, until then, for which we need the original inode. */
1645 if (inode_data
->fd
< 0) {
1646 inode_data
->fd
= TAKE_FD(fd
);
1647 LIST_PREPEND(to_close
, e
->inode_data_to_close
, inode_data
);
1650 /* Link our event source to the inode data object */
1651 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1652 s
->inotify
.inode_data
= inode_data
;
1654 /* Actually realize the watch now */
1655 r
= inode_data_realize_watch(e
, inode_data
);
1659 (void) sd_event_source_set_description(s
, path
);
1668 static sd_event_source
* event_source_free(sd_event_source
*s
) {
1672 /* Here's a special hack: when we are called from a
1673 * dispatch handler we won't free the event source
1674 * immediately, but we will detach the fd from the
1675 * epoll. This way it is safe for the caller to unref
1676 * the event source and immediately close the fd, but
1677 * we still retain a valid event source object after
1680 if (s
->dispatching
) {
1681 if (s
->type
== SOURCE_IO
)
1682 source_io_unregister(s
);
1684 source_disconnect(s
);
1691 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source
, sd_event_source
, event_source_free
);
1693 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1694 assert_return(s
, -EINVAL
);
1695 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1697 return free_and_strdup(&s
->description
, description
);
1700 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1701 assert_return(s
, -EINVAL
);
1702 assert_return(description
, -EINVAL
);
1703 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1705 if (!s
->description
)
1708 *description
= s
->description
;
1712 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1713 assert_return(s
, NULL
);
1718 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1719 assert_return(s
, -EINVAL
);
1720 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1721 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1722 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1727 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1728 assert_return(s
, -EINVAL
);
1729 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1730 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1735 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1738 assert_return(s
, -EINVAL
);
1739 assert_return(fd
>= 0, -EBADF
);
1740 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1741 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1746 if (s
->enabled
== SD_EVENT_OFF
) {
1748 s
->io
.registered
= false;
1752 saved_fd
= s
->io
.fd
;
1753 assert(s
->io
.registered
);
1756 s
->io
.registered
= false;
1758 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1760 s
->io
.fd
= saved_fd
;
1761 s
->io
.registered
= true;
1765 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1771 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
1772 assert_return(s
, -EINVAL
);
1773 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1778 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
1779 assert_return(s
, -EINVAL
);
1780 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1786 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1787 assert_return(s
, -EINVAL
);
1788 assert_return(events
, -EINVAL
);
1789 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1790 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1792 *events
= s
->io
.events
;
1796 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1799 assert_return(s
, -EINVAL
);
1800 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1801 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1802 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1803 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1805 /* edge-triggered updates are never skipped, so we can reset edges */
1806 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1809 r
= source_set_pending(s
, false);
1813 if (s
->enabled
!= SD_EVENT_OFF
) {
1814 r
= source_io_register(s
, s
->enabled
, events
);
1819 s
->io
.events
= events
;
1824 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1825 assert_return(s
, -EINVAL
);
1826 assert_return(revents
, -EINVAL
);
1827 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1828 assert_return(s
->pending
, -ENODATA
);
1829 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1831 *revents
= s
->io
.revents
;
1835 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1836 assert_return(s
, -EINVAL
);
1837 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1838 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1840 return s
->signal
.sig
;
1843 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1844 assert_return(s
, -EINVAL
);
1845 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1847 *priority
= s
->priority
;
1851 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1852 bool rm_inotify
= false, rm_inode
= false;
1853 struct inotify_data
*new_inotify_data
= NULL
;
1854 struct inode_data
*new_inode_data
= NULL
;
1857 assert_return(s
, -EINVAL
);
1858 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1859 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1861 if (s
->priority
== priority
)
1864 if (s
->type
== SOURCE_INOTIFY
) {
1865 struct inode_data
*old_inode_data
;
1867 assert(s
->inotify
.inode_data
);
1868 old_inode_data
= s
->inotify
.inode_data
;
1870 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1871 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1872 * events we allow priority changes only until the first following iteration. */
1873 if (old_inode_data
->fd
< 0)
1876 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
1881 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
1886 if (new_inode_data
->fd
< 0) {
1887 /* Duplicate the fd for the new inode object if we don't have any yet */
1888 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
1889 if (new_inode_data
->fd
< 0) {
1894 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close
, new_inode_data
);
1897 /* Move the event source to the new inode data structure */
1898 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
1899 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
1900 s
->inotify
.inode_data
= new_inode_data
;
1902 /* Now create the new watch */
1903 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
1906 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
1907 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
1908 s
->inotify
.inode_data
= old_inode_data
;
1912 s
->priority
= priority
;
1914 event_gc_inode_data(s
->event
, old_inode_data
);
1916 } else if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1917 struct signal_data
*old
, *d
;
1919 /* Move us from the signalfd belonging to the old
1920 * priority to the signalfd of the new priority */
1922 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1924 s
->priority
= priority
;
1926 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1928 s
->priority
= old
->priority
;
1932 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1934 s
->priority
= priority
;
1937 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1940 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1942 if (s
->type
== SOURCE_EXIT
)
1943 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1949 event_free_inode_data(s
->event
, new_inode_data
);
1952 event_free_inotify_data(s
->event
, new_inotify_data
);
1957 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1958 assert_return(s
, -EINVAL
);
1959 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1963 return s
->enabled
!= SD_EVENT_OFF
;
1966 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1969 assert_return(s
, -EINVAL
);
1970 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
1971 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1973 /* If we are dead anyway, we are fine with turning off
1974 * sources, but everything else needs to fail. */
1975 if (s
->event
->state
== SD_EVENT_FINISHED
)
1976 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1978 if (s
->enabled
== m
)
1981 if (m
== SD_EVENT_OFF
) {
1983 /* Unset the pending flag when this event source is disabled */
1984 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
1985 r
= source_set_pending(s
, false);
1993 source_io_unregister(s
);
1997 case SOURCE_TIME_REALTIME
:
1998 case SOURCE_TIME_BOOTTIME
:
1999 case SOURCE_TIME_MONOTONIC
:
2000 case SOURCE_TIME_REALTIME_ALARM
:
2001 case SOURCE_TIME_BOOTTIME_ALARM
: {
2002 struct clock_data
*d
;
2005 d
= event_get_clock_data(s
->event
, s
->type
);
2008 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2009 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2010 d
->needs_rearm
= true;
2017 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2023 assert(s
->event
->n_enabled_child_sources
> 0);
2024 s
->event
->n_enabled_child_sources
--;
2026 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2031 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2036 case SOURCE_INOTIFY
:
2041 assert_not_reached("Wut? I shouldn't exist.");
2046 /* Unset the pending flag when this event source is enabled */
2047 if (s
->enabled
== SD_EVENT_OFF
&& !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2048 r
= source_set_pending(s
, false);
2056 r
= source_io_register(s
, m
, s
->io
.events
);
2063 case SOURCE_TIME_REALTIME
:
2064 case SOURCE_TIME_BOOTTIME
:
2065 case SOURCE_TIME_MONOTONIC
:
2066 case SOURCE_TIME_REALTIME_ALARM
:
2067 case SOURCE_TIME_BOOTTIME_ALARM
: {
2068 struct clock_data
*d
;
2071 d
= event_get_clock_data(s
->event
, s
->type
);
2074 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2075 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2076 d
->needs_rearm
= true;
2084 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2086 s
->enabled
= SD_EVENT_OFF
;
2087 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2095 if (s
->enabled
== SD_EVENT_OFF
)
2096 s
->event
->n_enabled_child_sources
++;
2100 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2102 s
->enabled
= SD_EVENT_OFF
;
2103 s
->event
->n_enabled_child_sources
--;
2104 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2112 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2117 case SOURCE_INOTIFY
:
2122 assert_not_reached("Wut? I shouldn't exist.");
2127 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2130 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2135 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
2136 assert_return(s
, -EINVAL
);
2137 assert_return(usec
, -EINVAL
);
2138 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2139 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2141 *usec
= s
->time
.next
;
2145 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
2146 struct clock_data
*d
;
2149 assert_return(s
, -EINVAL
);
2150 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2151 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2152 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2154 r
= source_set_pending(s
, false);
2158 s
->time
.next
= usec
;
2160 d
= event_get_clock_data(s
->event
, s
->type
);
2163 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2164 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2165 d
->needs_rearm
= true;
2170 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
2171 assert_return(s
, -EINVAL
);
2172 assert_return(usec
, -EINVAL
);
2173 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2174 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2176 *usec
= s
->time
.accuracy
;
2180 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
2181 struct clock_data
*d
;
2184 assert_return(s
, -EINVAL
);
2185 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
2186 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2187 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2188 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2190 r
= source_set_pending(s
, false);
2195 usec
= DEFAULT_ACCURACY_USEC
;
2197 s
->time
.accuracy
= usec
;
2199 d
= event_get_clock_data(s
->event
, s
->type
);
2202 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2203 d
->needs_rearm
= true;
2208 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
2209 assert_return(s
, -EINVAL
);
2210 assert_return(clock
, -EINVAL
);
2211 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2212 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2214 *clock
= event_source_type_to_clock(s
->type
);
2218 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
2219 assert_return(s
, -EINVAL
);
2220 assert_return(pid
, -EINVAL
);
2221 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2222 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2224 *pid
= s
->child
.pid
;
2228 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
2229 assert_return(s
, -EINVAL
);
2230 assert_return(mask
, -EINVAL
);
2231 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
2232 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2234 *mask
= s
->inotify
.mask
;
2238 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
2241 assert_return(s
, -EINVAL
);
2242 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2243 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2244 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2246 if (s
->prepare
== callback
)
2249 if (callback
&& s
->prepare
) {
2250 s
->prepare
= callback
;
2254 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
2258 s
->prepare
= callback
;
2261 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
2265 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
2270 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
2271 assert_return(s
, NULL
);
2276 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
2279 assert_return(s
, NULL
);
2282 s
->userdata
= userdata
;
2287 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
2294 if (a
>= USEC_INFINITY
)
2295 return USEC_INFINITY
;
2300 initialize_perturb(e
);
2303 Find a good time to wake up again between times a and b. We
2304 have two goals here:
2306 a) We want to wake up as seldom as possible, hence prefer
2307 later times over earlier times.
2309 b) But if we have to wake up, then let's make sure to
2310 dispatch as much as possible on the entire system.
2312 We implement this by waking up everywhere at the same time
2313 within any given minute if we can, synchronised via the
2314 perturbation value determined from the boot ID. If we can't,
2315 then we try to find the same spot in every 10s, then 1s and
2316 then 250ms step. Otherwise, we pick the last possible time
2320 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
2322 if (_unlikely_(c
< USEC_PER_MINUTE
))
2325 c
-= USEC_PER_MINUTE
;
2331 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
2333 if (_unlikely_(c
< USEC_PER_SEC
*10))
2336 c
-= USEC_PER_SEC
*10;
2342 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
2344 if (_unlikely_(c
< USEC_PER_SEC
))
2353 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
2355 if (_unlikely_(c
< USEC_PER_MSEC
*250))
2358 c
-= USEC_PER_MSEC
*250;
2367 static int event_arm_timer(
2369 struct clock_data
*d
) {
2371 struct itimerspec its
= {};
2372 sd_event_source
*a
, *b
;
2379 if (!d
->needs_rearm
)
2382 d
->needs_rearm
= false;
2384 a
= prioq_peek(d
->earliest
);
2385 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2390 if (d
->next
== USEC_INFINITY
)
2394 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2398 d
->next
= USEC_INFINITY
;
2402 b
= prioq_peek(d
->latest
);
2403 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2405 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2409 assert_se(d
->fd
>= 0);
2412 /* We don' want to disarm here, just mean some time looooong ago. */
2413 its
.it_value
.tv_sec
= 0;
2414 its
.it_value
.tv_nsec
= 1;
2416 timespec_store(&its
.it_value
, t
);
2418 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2426 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2429 assert(s
->type
== SOURCE_IO
);
2431 /* If the event source was already pending, we just OR in the
2432 * new revents, otherwise we reset the value. The ORing is
2433 * necessary to handle EPOLLONESHOT events properly where
2434 * readability might happen independently of writability, and
2435 * we need to keep track of both */
2438 s
->io
.revents
|= revents
;
2440 s
->io
.revents
= revents
;
2442 return source_set_pending(s
, true);
2445 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2452 assert_return(events
== EPOLLIN
, -EIO
);
2454 ss
= read(fd
, &x
, sizeof(x
));
2456 if (IN_SET(errno
, EAGAIN
, EINTR
))
2462 if (_unlikely_(ss
!= sizeof(x
)))
2466 *next
= USEC_INFINITY
;
2471 static int process_timer(
2474 struct clock_data
*d
) {
2483 s
= prioq_peek(d
->earliest
);
2486 s
->enabled
== SD_EVENT_OFF
||
2490 r
= source_set_pending(s
, true);
2494 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2495 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2496 d
->needs_rearm
= true;
2502 static int process_child(sd_event
*e
) {
2509 e
->need_process_child
= false;
2512 So, this is ugly. We iteratively invoke waitid() with P_PID
2513 + WNOHANG for each PID we wait for, instead of using
2514 P_ALL. This is because we only want to get child
2515 information of very specific child processes, and not all
2516 of them. We might not have processed the SIGCHLD even of a
2517 previous invocation and we don't want to maintain a
2518 unbounded *per-child* event queue, hence we really don't
2519 want anything flushed out of the kernel's queue that we
2520 don't care about. Since this is O(n) this means that if you
2521 have a lot of processes you probably want to handle SIGCHLD
2524 We do not reap the children here (by using WNOWAIT), this
2525 is only done after the event source is dispatched so that
2526 the callback still sees the process as a zombie.
2529 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2530 assert(s
->type
== SOURCE_CHILD
);
2535 if (s
->enabled
== SD_EVENT_OFF
)
2538 zero(s
->child
.siginfo
);
2539 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2540 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2544 if (s
->child
.siginfo
.si_pid
!= 0) {
2545 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2547 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2548 /* If the child isn't dead then let's
2549 * immediately remove the state change
2550 * from the queue, since there's no
2551 * benefit in leaving it queued */
2553 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2554 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2557 r
= source_set_pending(s
, true);
2566 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2567 bool read_one
= false;
2572 assert_return(events
== EPOLLIN
, -EIO
);
2574 /* If there's a signal queued on this priority and SIGCHLD is
2575 on this priority too, then make sure to recheck the
2576 children we watch. This is because we only ever dequeue
2577 the first signal per priority, and if we dequeue one, and
2578 SIGCHLD might be enqueued later we wouldn't know, but we
2579 might have higher priority children we care about hence we
2580 need to check that explicitly. */
2582 if (sigismember(&d
->sigset
, SIGCHLD
))
2583 e
->need_process_child
= true;
2585 /* If there's already an event source pending for this
2586 * priority we don't read another */
2591 struct signalfd_siginfo si
;
2593 sd_event_source
*s
= NULL
;
2595 n
= read(d
->fd
, &si
, sizeof(si
));
2597 if (IN_SET(errno
, EAGAIN
, EINTR
))
2603 if (_unlikely_(n
!= sizeof(si
)))
2606 assert(SIGNAL_VALID(si
.ssi_signo
));
2610 if (e
->signal_sources
)
2611 s
= e
->signal_sources
[si
.ssi_signo
];
2617 s
->signal
.siginfo
= si
;
2620 r
= source_set_pending(s
, true);
2628 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
) {
2634 assert_return(revents
== EPOLLIN
, -EIO
);
2636 /* If there's already an event source pending for this priority, don't read another */
2637 if (d
->n_pending
> 0)
2640 /* Is the read buffer non-empty? If so, let's not read more */
2641 if (d
->buffer_filled
> 0)
2644 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
2646 if (IN_SET(errno
, EAGAIN
, EINTR
))
2653 d
->buffer_filled
= (size_t) n
;
2654 LIST_PREPEND(buffered
, e
->inotify_data_buffered
, d
);
2659 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
2662 assert(sz
<= d
->buffer_filled
);
2667 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2668 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
2669 d
->buffer_filled
-= sz
;
2671 if (d
->buffer_filled
== 0)
2672 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
2675 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
2681 /* If there's already an event source pending for this priority, don't read another */
2682 if (d
->n_pending
> 0)
2685 while (d
->buffer_filled
> 0) {
2688 /* Let's validate that the event structures are complete */
2689 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
2692 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2693 if (d
->buffer_filled
< sz
)
2696 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
2697 struct inode_data
*inode_data
;
2700 /* The queue overran, let's pass this event to all event sources connected to this inotify
2703 HASHMAP_FOREACH(inode_data
, d
->inodes
, i
) {
2706 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2708 if (s
->enabled
== SD_EVENT_OFF
)
2711 r
= source_set_pending(s
, true);
2717 struct inode_data
*inode_data
;
2720 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2721 * our watch descriptor table. */
2722 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
2724 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2726 event_inotify_data_drop(e
, d
, sz
);
2730 /* The watch descriptor was removed by the kernel, let's drop it here too */
2731 inode_data
->wd
= -1;
2733 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2735 event_inotify_data_drop(e
, d
, sz
);
2740 /* Trigger all event sources that are interested in these events. Also trigger all event
2741 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2742 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2744 if (s
->enabled
== SD_EVENT_OFF
)
2747 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
2748 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
2751 r
= source_set_pending(s
, true);
2757 /* Something pending now? If so, let's finish, otherwise let's read more. */
2758 if (d
->n_pending
> 0)
2765 static int process_inotify(sd_event
*e
) {
2766 struct inotify_data
*d
;
2771 LIST_FOREACH(buffered
, d
, e
->inotify_data_buffered
) {
2772 r
= event_inotify_data_process(e
, d
);
2782 static int source_dispatch(sd_event_source
*s
) {
2783 EventSourceType saved_type
;
2787 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2789 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2791 saved_type
= s
->type
;
2793 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2794 r
= source_set_pending(s
, false);
2799 if (s
->type
!= SOURCE_POST
) {
2803 /* If we execute a non-post source, let's mark all
2804 * post sources as pending */
2806 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2807 if (z
->enabled
== SD_EVENT_OFF
)
2810 r
= source_set_pending(z
, true);
2816 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2817 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2822 s
->dispatching
= true;
2827 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2830 case SOURCE_TIME_REALTIME
:
2831 case SOURCE_TIME_BOOTTIME
:
2832 case SOURCE_TIME_MONOTONIC
:
2833 case SOURCE_TIME_REALTIME_ALARM
:
2834 case SOURCE_TIME_BOOTTIME_ALARM
:
2835 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2839 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2842 case SOURCE_CHILD
: {
2845 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2847 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2849 /* Now, reap the PID for good. */
2851 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2857 r
= s
->defer
.callback(s
, s
->userdata
);
2861 r
= s
->post
.callback(s
, s
->userdata
);
2865 r
= s
->exit
.callback(s
, s
->userdata
);
2868 case SOURCE_INOTIFY
: {
2869 struct sd_event
*e
= s
->event
;
2870 struct inotify_data
*d
;
2873 assert(s
->inotify
.inode_data
);
2874 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
2876 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
2877 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2878 assert(d
->buffer_filled
>= sz
);
2880 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
2882 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2884 if (d
->n_pending
== 0)
2885 event_inotify_data_drop(e
, d
, sz
);
2890 case SOURCE_WATCHDOG
:
2891 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2892 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2893 assert_not_reached("Wut? I shouldn't exist.");
2896 s
->dispatching
= false;
2899 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
2900 strna(s
->description
), event_source_type_to_string(saved_type
));
2905 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2910 static int event_prepare(sd_event
*e
) {
2918 s
= prioq_peek(e
->prepare
);
2919 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2922 s
->prepare_iteration
= e
->iteration
;
2923 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2929 s
->dispatching
= true;
2930 r
= s
->prepare(s
, s
->userdata
);
2931 s
->dispatching
= false;
2934 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2935 strna(s
->description
), event_source_type_to_string(s
->type
));
2940 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2946 static int dispatch_exit(sd_event
*e
) {
2948 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
2953 p
= prioq_peek(e
->exit
);
2954 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2955 e
->state
= SD_EVENT_FINISHED
;
2959 ref
= sd_event_ref(e
);
2961 e
->state
= SD_EVENT_EXITING
;
2962 r
= source_dispatch(p
);
2963 e
->state
= SD_EVENT_INITIAL
;
2967 static sd_event_source
* event_next_pending(sd_event
*e
) {
2972 p
= prioq_peek(e
->pending
);
2976 if (p
->enabled
== SD_EVENT_OFF
)
2982 static int arm_watchdog(sd_event
*e
) {
2983 struct itimerspec its
= {};
2988 assert(e
->watchdog_fd
>= 0);
2990 t
= sleep_between(e
,
2991 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2992 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2994 timespec_store(&its
.it_value
, t
);
2996 /* Make sure we never set the watchdog to 0, which tells the
2997 * kernel to disable it. */
2998 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
2999 its
.it_value
.tv_nsec
= 1;
3001 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
3008 static int process_watchdog(sd_event
*e
) {
3014 /* Don't notify watchdog too often */
3015 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
3018 sd_notify(false, "WATCHDOG=1");
3019 e
->watchdog_last
= e
->timestamp
.monotonic
;
3021 return arm_watchdog(e
);
3024 static void event_close_inode_data_fds(sd_event
*e
) {
3025 struct inode_data
*d
;
3029 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3030 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3031 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3032 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3035 while ((d
= e
->inode_data_to_close
)) {
3037 d
->fd
= safe_close(d
->fd
);
3039 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
3043 _public_
int sd_event_prepare(sd_event
*e
) {
3046 assert_return(e
, -EINVAL
);
3047 assert_return(e
= event_resolve(e
), -ENOPKG
);
3048 assert_return(!event_pid_changed(e
), -ECHILD
);
3049 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3050 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3052 if (e
->exit_requested
)
3057 e
->state
= SD_EVENT_PREPARING
;
3058 r
= event_prepare(e
);
3059 e
->state
= SD_EVENT_INITIAL
;
3063 r
= event_arm_timer(e
, &e
->realtime
);
3067 r
= event_arm_timer(e
, &e
->boottime
);
3071 r
= event_arm_timer(e
, &e
->monotonic
);
3075 r
= event_arm_timer(e
, &e
->realtime_alarm
);
3079 r
= event_arm_timer(e
, &e
->boottime_alarm
);
3083 event_close_inode_data_fds(e
);
3085 if (event_next_pending(e
) || e
->need_process_child
)
3088 e
->state
= SD_EVENT_ARMED
;
3093 e
->state
= SD_EVENT_ARMED
;
3094 r
= sd_event_wait(e
, 0);
3096 e
->state
= SD_EVENT_ARMED
;
3101 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
3102 struct epoll_event
*ev_queue
;
3103 unsigned ev_queue_max
;
3106 assert_return(e
, -EINVAL
);
3107 assert_return(e
= event_resolve(e
), -ENOPKG
);
3108 assert_return(!event_pid_changed(e
), -ECHILD
);
3109 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3110 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
3112 if (e
->exit_requested
) {
3113 e
->state
= SD_EVENT_PENDING
;
3117 ev_queue_max
= MAX(e
->n_sources
, 1u);
3118 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
3120 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3121 if (e
->inotify_data_buffered
)
3124 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
3125 timeout
== (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout
, USEC_PER_MSEC
));
3127 if (errno
== EINTR
) {
3128 e
->state
= SD_EVENT_PENDING
;
3136 triple_timestamp_get(&e
->timestamp
);
3138 for (i
= 0; i
< m
; i
++) {
3140 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
3141 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
3143 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
3147 case WAKEUP_EVENT_SOURCE
:
3148 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3151 case WAKEUP_CLOCK_DATA
: {
3152 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
3153 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
3157 case WAKEUP_SIGNAL_DATA
:
3158 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3161 case WAKEUP_INOTIFY_DATA
:
3162 r
= event_inotify_data_read(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3166 assert_not_reached("Invalid wake-up pointer");
3173 r
= process_watchdog(e
);
3177 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
3181 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
3185 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
3189 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
3193 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
3197 if (e
->need_process_child
) {
3198 r
= process_child(e
);
3203 r
= process_inotify(e
);
3207 if (event_next_pending(e
)) {
3208 e
->state
= SD_EVENT_PENDING
;
3216 e
->state
= SD_EVENT_INITIAL
;
3221 _public_
int sd_event_dispatch(sd_event
*e
) {
3225 assert_return(e
, -EINVAL
);
3226 assert_return(e
= event_resolve(e
), -ENOPKG
);
3227 assert_return(!event_pid_changed(e
), -ECHILD
);
3228 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3229 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
3231 if (e
->exit_requested
)
3232 return dispatch_exit(e
);
3234 p
= event_next_pending(e
);
3236 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3238 ref
= sd_event_ref(e
);
3239 e
->state
= SD_EVENT_RUNNING
;
3240 r
= source_dispatch(p
);
3241 e
->state
= SD_EVENT_INITIAL
;
3245 e
->state
= SD_EVENT_INITIAL
;
3250 static void event_log_delays(sd_event
*e
) {
3251 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1];
3255 for (i
= o
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
3256 o
+= snprintf(&b
[o
], sizeof(b
) - o
, "%u ", e
->delays
[i
]);
3259 log_debug("Event loop iterations: %.*s", o
, b
);
3262 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
3265 assert_return(e
, -EINVAL
);
3266 assert_return(e
= event_resolve(e
), -ENOPKG
);
3267 assert_return(!event_pid_changed(e
), -ECHILD
);
3268 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3269 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3271 if (e
->profile_delays
&& e
->last_run
) {
3275 this_run
= now(CLOCK_MONOTONIC
);
3277 l
= u64log2(this_run
- e
->last_run
);
3278 assert(l
< sizeof(e
->delays
));
3281 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
3282 event_log_delays(e
);
3283 e
->last_log
= this_run
;
3287 r
= sd_event_prepare(e
);
3289 /* There was nothing? Then wait... */
3290 r
= sd_event_wait(e
, timeout
);
3292 if (e
->profile_delays
)
3293 e
->last_run
= now(CLOCK_MONOTONIC
);
3296 /* There's something now, then let's dispatch it */
3297 r
= sd_event_dispatch(e
);
3307 _public_
int sd_event_loop(sd_event
*e
) {
3308 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3311 assert_return(e
, -EINVAL
);
3312 assert_return(e
= event_resolve(e
), -ENOPKG
);
3313 assert_return(!event_pid_changed(e
), -ECHILD
);
3314 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3316 ref
= sd_event_ref(e
);
3318 while (e
->state
!= SD_EVENT_FINISHED
) {
3319 r
= sd_event_run(e
, (uint64_t) -1);
3324 return e
->exit_code
;
3327 _public_
int sd_event_get_fd(sd_event
*e
) {
3329 assert_return(e
, -EINVAL
);
3330 assert_return(e
= event_resolve(e
), -ENOPKG
);
3331 assert_return(!event_pid_changed(e
), -ECHILD
);
3336 _public_
int sd_event_get_state(sd_event
*e
) {
3337 assert_return(e
, -EINVAL
);
3338 assert_return(e
= event_resolve(e
), -ENOPKG
);
3339 assert_return(!event_pid_changed(e
), -ECHILD
);
3344 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
3345 assert_return(e
, -EINVAL
);
3346 assert_return(e
= event_resolve(e
), -ENOPKG
);
3347 assert_return(code
, -EINVAL
);
3348 assert_return(!event_pid_changed(e
), -ECHILD
);
3350 if (!e
->exit_requested
)
3353 *code
= e
->exit_code
;
3357 _public_
int sd_event_exit(sd_event
*e
, int code
) {
3358 assert_return(e
, -EINVAL
);
3359 assert_return(e
= event_resolve(e
), -ENOPKG
);
3360 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3361 assert_return(!event_pid_changed(e
), -ECHILD
);
3363 e
->exit_requested
= true;
3364 e
->exit_code
= code
;
3369 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
3370 assert_return(e
, -EINVAL
);
3371 assert_return(e
= event_resolve(e
), -ENOPKG
);
3372 assert_return(usec
, -EINVAL
);
3373 assert_return(!event_pid_changed(e
), -ECHILD
);
3375 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
3378 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3379 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3380 * the purpose of getting the time this doesn't matter. */
3381 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
3384 if (!triple_timestamp_is_set(&e
->timestamp
)) {
3385 /* Implicitly fall back to now() if we never ran
3386 * before and thus have no cached time. */
3391 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
3395 _public_
int sd_event_default(sd_event
**ret
) {
3400 return !!default_event
;
3402 if (default_event
) {
3403 *ret
= sd_event_ref(default_event
);
3407 r
= sd_event_new(&e
);
3411 e
->default_event_ptr
= &default_event
;
3419 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
3420 assert_return(e
, -EINVAL
);
3421 assert_return(e
= event_resolve(e
), -ENOPKG
);
3422 assert_return(tid
, -EINVAL
);
3423 assert_return(!event_pid_changed(e
), -ECHILD
);
3433 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
3436 assert_return(e
, -EINVAL
);
3437 assert_return(e
= event_resolve(e
), -ENOPKG
);
3438 assert_return(!event_pid_changed(e
), -ECHILD
);
3440 if (e
->watchdog
== !!b
)
3444 struct epoll_event ev
;
3446 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
3450 /* Issue first ping immediately */
3451 sd_notify(false, "WATCHDOG=1");
3452 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
3454 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
3455 if (e
->watchdog_fd
< 0)
3458 r
= arm_watchdog(e
);
3462 ev
= (struct epoll_event
) {
3464 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
3467 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
3474 if (e
->watchdog_fd
>= 0) {
3475 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
3476 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3484 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3488 _public_
int sd_event_get_watchdog(sd_event
*e
) {
3489 assert_return(e
, -EINVAL
);
3490 assert_return(e
= event_resolve(e
), -ENOPKG
);
3491 assert_return(!event_pid_changed(e
), -ECHILD
);
3496 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
3497 assert_return(e
, -EINVAL
);
3498 assert_return(e
= event_resolve(e
), -ENOPKG
);
3499 assert_return(!event_pid_changed(e
), -ECHILD
);
3501 *ret
= e
->iteration
;
3505 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
3506 assert_return(s
, -EINVAL
);
3508 s
->destroy_callback
= callback
;
3512 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
3513 assert_return(s
, -EINVAL
);
3516 *ret
= s
->destroy_callback
;
3518 return !!s
->destroy_callback
;
3521 _public_
int sd_event_source_get_floating(sd_event_source
*s
) {
3522 assert_return(s
, -EINVAL
);
3527 _public_
int sd_event_source_set_floating(sd_event_source
*s
, int b
) {
3528 assert_return(s
, -EINVAL
);
3530 if (s
->floating
== !!b
)
3533 if (!s
->event
) /* Already disconnected */
3539 sd_event_source_ref(s
);
3540 sd_event_unref(s
->event
);
3542 sd_event_ref(s
->event
);
3543 sd_event_source_unref(s
);