1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <sys/timerfd.h>
11 #include "alloc-util.h"
13 #include "event-source.h"
19 #include "memory-util.h"
20 #include "missing_syscall.h"
22 #include "process-util.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
28 #include "time-util.h"
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
32 static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source
*s
) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
35 s
->type
== SOURCE_CHILD
&&
36 s
->child
.pidfd
>= 0 &&
37 s
->child
.options
== WEXITED
;
40 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
42 [SOURCE_TIME_REALTIME
] = "realtime",
43 [SOURCE_TIME_BOOTTIME
] = "bootime",
44 [SOURCE_TIME_MONOTONIC
] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
47 [SOURCE_SIGNAL
] = "signal",
48 [SOURCE_CHILD
] = "child",
49 [SOURCE_DEFER
] = "defer",
50 [SOURCE_POST
] = "post",
51 [SOURCE_EXIT
] = "exit",
52 [SOURCE_WATCHDOG
] = "watchdog",
53 [SOURCE_INOTIFY
] = "inotify",
56 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
69 /* timerfd_create() only supports these five clocks so far. We
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime
;
73 struct clock_data boottime
;
74 struct clock_data monotonic
;
75 struct clock_data realtime_alarm
;
76 struct clock_data boottime_alarm
;
80 sd_event_source
**signal_sources
; /* indexed by signal number */
81 Hashmap
*signal_data
; /* indexed by priority */
83 Hashmap
*child_sources
;
84 unsigned n_enabled_child_sources
;
90 Hashmap
*inotify_data
; /* indexed by priority */
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data
, inode_data_to_close
);
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data
, inotify_data_buffered
);
101 triple_timestamp timestamp
;
104 bool exit_requested
:1;
105 bool need_process_child
:1;
107 bool profile_delays
:1;
112 sd_event
**default_event_ptr
;
114 usec_t watchdog_last
, watchdog_period
;
118 struct epoll_event
*event_queue
;
119 size_t event_queue_allocated
;
121 LIST_HEAD(sd_event_source
, sources
);
123 usec_t last_run
, last_log
;
124 unsigned delays
[sizeof(usec_t
) * 8];
127 static thread_local sd_event
*default_event
= NULL
;
129 static void source_disconnect(sd_event_source
*s
);
130 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
132 static sd_event
*event_resolve(sd_event
*e
) {
133 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
136 static int pending_prioq_compare(const void *a
, const void *b
) {
137 const sd_event_source
*x
= a
, *y
= b
;
143 /* Enabled ones first */
144 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
146 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
149 /* Lower priority values first */
150 r
= CMP(x
->priority
, y
->priority
);
154 /* Older entries first */
155 return CMP(x
->pending_iteration
, y
->pending_iteration
);
158 static int prepare_prioq_compare(const void *a
, const void *b
) {
159 const sd_event_source
*x
= a
, *y
= b
;
165 /* Enabled ones first */
166 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
168 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
174 r
= CMP(x
->prepare_iteration
, y
->prepare_iteration
);
178 /* Lower priority values first */
179 return CMP(x
->priority
, y
->priority
);
182 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
183 const sd_event_source
*x
= a
, *y
= b
;
185 assert(EVENT_SOURCE_IS_TIME(x
->type
));
186 assert(x
->type
== y
->type
);
188 /* Enabled ones first */
189 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
191 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
194 /* Move the pending ones to the end */
195 if (!x
->pending
&& y
->pending
)
197 if (x
->pending
&& !y
->pending
)
201 return CMP(x
->time
.next
, y
->time
.next
);
204 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
205 return usec_add(s
->time
.next
, s
->time
.accuracy
);
208 static int latest_time_prioq_compare(const void *a
, const void *b
) {
209 const sd_event_source
*x
= a
, *y
= b
;
211 assert(EVENT_SOURCE_IS_TIME(x
->type
));
212 assert(x
->type
== y
->type
);
214 /* Enabled ones first */
215 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
217 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
220 /* Move the pending ones to the end */
221 if (!x
->pending
&& y
->pending
)
223 if (x
->pending
&& !y
->pending
)
227 return CMP(time_event_source_latest(x
), time_event_source_latest(y
));
230 static int exit_prioq_compare(const void *a
, const void *b
) {
231 const sd_event_source
*x
= a
, *y
= b
;
233 assert(x
->type
== SOURCE_EXIT
);
234 assert(y
->type
== SOURCE_EXIT
);
236 /* Enabled ones first */
237 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
239 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
242 /* Lower priority values first */
243 return CMP(x
->priority
, y
->priority
);
246 static void free_clock_data(struct clock_data
*d
) {
248 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
251 prioq_free(d
->earliest
);
252 prioq_free(d
->latest
);
255 static sd_event
*event_free(sd_event
*e
) {
260 while ((s
= e
->sources
)) {
262 source_disconnect(s
);
263 sd_event_source_unref(s
);
266 assert(e
->n_sources
== 0);
268 if (e
->default_event_ptr
)
269 *(e
->default_event_ptr
) = NULL
;
271 safe_close(e
->epoll_fd
);
272 safe_close(e
->watchdog_fd
);
274 free_clock_data(&e
->realtime
);
275 free_clock_data(&e
->boottime
);
276 free_clock_data(&e
->monotonic
);
277 free_clock_data(&e
->realtime_alarm
);
278 free_clock_data(&e
->boottime_alarm
);
280 prioq_free(e
->pending
);
281 prioq_free(e
->prepare
);
284 free(e
->signal_sources
);
285 hashmap_free(e
->signal_data
);
287 hashmap_free(e
->inotify_data
);
289 hashmap_free(e
->child_sources
);
290 set_free(e
->post_sources
);
292 free(e
->event_queue
);
297 _public_
int sd_event_new(sd_event
** ret
) {
301 assert_return(ret
, -EINVAL
);
303 e
= new(sd_event
, 1);
311 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
313 .realtime
.next
= USEC_INFINITY
,
314 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
316 .boottime
.next
= USEC_INFINITY
,
317 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
319 .monotonic
.next
= USEC_INFINITY
,
320 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
321 .realtime_alarm
.fd
= -1,
322 .realtime_alarm
.next
= USEC_INFINITY
,
323 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
324 .boottime_alarm
.fd
= -1,
325 .boottime_alarm
.next
= USEC_INFINITY
,
326 .perturb
= USEC_INFINITY
,
327 .original_pid
= getpid_cached(),
330 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
334 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
335 if (e
->epoll_fd
< 0) {
340 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
344 e
->profile_delays
= true;
355 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event
, sd_event
, event_free
);
357 _public_ sd_event_source
* sd_event_source_disable_unref(sd_event_source
*s
) {
359 (void) sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
360 return sd_event_source_unref(s
);
363 static bool event_pid_changed(sd_event
*e
) {
366 /* We don't support people creating an event loop and keeping
367 * it around over a fork(). Let's complain. */
369 return e
->original_pid
!= getpid_cached();
372 static void source_io_unregister(sd_event_source
*s
) {
374 assert(s
->type
== SOURCE_IO
);
376 if (event_pid_changed(s
->event
))
379 if (!s
->io
.registered
)
382 if (epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
) < 0)
383 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s
->description
), event_source_type_to_string(s
->type
));
386 s
->io
.registered
= false;
389 static int source_io_register(
395 assert(s
->type
== SOURCE_IO
);
396 assert(enabled
!= SD_EVENT_OFF
);
398 struct epoll_event ev
= {
399 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
404 r
= epoll_ctl(s
->event
->epoll_fd
,
405 s
->io
.registered
? EPOLL_CTL_MOD
: EPOLL_CTL_ADD
,
411 s
->io
.registered
= true;
416 static void source_child_pidfd_unregister(sd_event_source
*s
) {
418 assert(s
->type
== SOURCE_CHILD
);
420 if (event_pid_changed(s
->event
))
423 if (!s
->child
.registered
)
426 if (EVENT_SOURCE_WATCH_PIDFD(s
))
427 if (epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->child
.pidfd
, NULL
) < 0)
428 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
429 strna(s
->description
), event_source_type_to_string(s
->type
));
431 s
->child
.registered
= false;
434 static int source_child_pidfd_register(sd_event_source
*s
, int enabled
) {
438 assert(s
->type
== SOURCE_CHILD
);
439 assert(enabled
!= SD_EVENT_OFF
);
441 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
442 struct epoll_event ev
= {
443 .events
= EPOLLIN
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
447 if (s
->child
.registered
)
448 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->child
.pidfd
, &ev
);
450 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->child
.pidfd
, &ev
);
455 s
->child
.registered
= true;
459 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
463 case SOURCE_TIME_REALTIME
:
464 return CLOCK_REALTIME
;
466 case SOURCE_TIME_BOOTTIME
:
467 return CLOCK_BOOTTIME
;
469 case SOURCE_TIME_MONOTONIC
:
470 return CLOCK_MONOTONIC
;
472 case SOURCE_TIME_REALTIME_ALARM
:
473 return CLOCK_REALTIME_ALARM
;
475 case SOURCE_TIME_BOOTTIME_ALARM
:
476 return CLOCK_BOOTTIME_ALARM
;
479 return (clockid_t
) -1;
483 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
488 return SOURCE_TIME_REALTIME
;
491 return SOURCE_TIME_BOOTTIME
;
493 case CLOCK_MONOTONIC
:
494 return SOURCE_TIME_MONOTONIC
;
496 case CLOCK_REALTIME_ALARM
:
497 return SOURCE_TIME_REALTIME_ALARM
;
499 case CLOCK_BOOTTIME_ALARM
:
500 return SOURCE_TIME_BOOTTIME_ALARM
;
503 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
507 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
512 case SOURCE_TIME_REALTIME
:
515 case SOURCE_TIME_BOOTTIME
:
518 case SOURCE_TIME_MONOTONIC
:
519 return &e
->monotonic
;
521 case SOURCE_TIME_REALTIME_ALARM
:
522 return &e
->realtime_alarm
;
524 case SOURCE_TIME_BOOTTIME_ALARM
:
525 return &e
->boottime_alarm
;
532 static void event_free_signal_data(sd_event
*e
, struct signal_data
*d
) {
538 hashmap_remove(e
->signal_data
, &d
->priority
);
543 static int event_make_signal_data(
546 struct signal_data
**ret
) {
548 struct signal_data
*d
;
556 if (event_pid_changed(e
))
559 if (e
->signal_sources
&& e
->signal_sources
[sig
])
560 priority
= e
->signal_sources
[sig
]->priority
;
562 priority
= SD_EVENT_PRIORITY_NORMAL
;
564 d
= hashmap_get(e
->signal_data
, &priority
);
566 if (sigismember(&d
->sigset
, sig
) > 0) {
572 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
576 d
= new(struct signal_data
, 1);
580 *d
= (struct signal_data
) {
581 .wakeup
= WAKEUP_SIGNAL_DATA
,
583 .priority
= priority
,
586 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
596 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
598 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
612 d
->fd
= fd_move_above_stdio(r
);
614 struct epoll_event ev
= {
619 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
632 event_free_signal_data(e
, d
);
637 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
641 /* Turns off the specified signal in the signal data
642 * object. If the signal mask of the object becomes empty that
645 if (sigismember(&d
->sigset
, sig
) == 0)
648 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
650 if (sigisemptyset(&d
->sigset
)) {
651 /* If all the mask is all-zero we can get rid of the structure */
652 event_free_signal_data(e
, d
);
658 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
659 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
662 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
663 struct signal_data
*d
;
664 static const int64_t zero_priority
= 0;
668 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
669 * and possibly drop the signalfd for it. */
671 if (sig
== SIGCHLD
&&
672 e
->n_enabled_child_sources
> 0)
675 if (e
->signal_sources
&&
676 e
->signal_sources
[sig
] &&
677 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
681 * The specified signal might be enabled in three different queues:
683 * 1) the one that belongs to the priority passed (if it is non-NULL)
684 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
685 * 3) the 0 priority (to cover the SIGCHLD case)
687 * Hence, let's remove it from all three here.
691 d
= hashmap_get(e
->signal_data
, priority
);
693 event_unmask_signal_data(e
, d
, sig
);
696 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
697 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
699 event_unmask_signal_data(e
, d
, sig
);
702 d
= hashmap_get(e
->signal_data
, &zero_priority
);
704 event_unmask_signal_data(e
, d
, sig
);
707 static void source_disconnect(sd_event_source
*s
) {
715 assert(s
->event
->n_sources
> 0);
721 source_io_unregister(s
);
725 case SOURCE_TIME_REALTIME
:
726 case SOURCE_TIME_BOOTTIME
:
727 case SOURCE_TIME_MONOTONIC
:
728 case SOURCE_TIME_REALTIME_ALARM
:
729 case SOURCE_TIME_BOOTTIME_ALARM
: {
730 struct clock_data
*d
;
732 d
= event_get_clock_data(s
->event
, s
->type
);
735 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
736 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
737 d
->needs_rearm
= true;
742 if (s
->signal
.sig
> 0) {
744 if (s
->event
->signal_sources
)
745 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
747 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
753 if (s
->child
.pid
> 0) {
754 if (s
->enabled
!= SD_EVENT_OFF
) {
755 assert(s
->event
->n_enabled_child_sources
> 0);
756 s
->event
->n_enabled_child_sources
--;
759 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
762 if (EVENT_SOURCE_WATCH_PIDFD(s
))
763 source_child_pidfd_unregister(s
);
765 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
774 set_remove(s
->event
->post_sources
, s
);
778 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
781 case SOURCE_INOTIFY
: {
782 struct inode_data
*inode_data
;
784 inode_data
= s
->inotify
.inode_data
;
786 struct inotify_data
*inotify_data
;
787 assert_se(inotify_data
= inode_data
->inotify_data
);
789 /* Detach this event source from the inode object */
790 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
791 s
->inotify
.inode_data
= NULL
;
794 assert(inotify_data
->n_pending
> 0);
795 inotify_data
->n_pending
--;
798 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
799 * continued to being watched. That's because inotify doesn't really have an API for that: we
800 * can only change watch masks with access to the original inode either by fd or by path. But
801 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
802 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
803 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
804 * there), but given the need for open_by_handle_at() which is privileged and not universally
805 * available this would be quite an incomplete solution. Hence we go the other way, leave the
806 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
807 * anymore after reception. Yes, this sucks, but … Linux … */
809 /* Maybe release the inode data (and its inotify) */
810 event_gc_inode_data(s
->event
, inode_data
);
817 assert_not_reached("Wut? I shouldn't exist.");
821 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
824 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
826 event
= TAKE_PTR(s
->event
);
827 LIST_REMOVE(sources
, event
->sources
, s
);
830 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
831 * pidfd associated with this event source, which we'll do only on source_free(). */
834 sd_event_unref(event
);
837 static void source_free(sd_event_source
*s
) {
840 source_disconnect(s
);
842 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
843 s
->io
.fd
= safe_close(s
->io
.fd
);
845 if (s
->type
== SOURCE_CHILD
) {
846 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
848 if (s
->child
.process_owned
) {
850 if (!s
->child
.exited
) {
853 if (s
->child
.pidfd
>= 0) {
854 if (pidfd_send_signal(s
->child
.pidfd
, SIGKILL
, NULL
, 0) < 0) {
855 if (errno
== ESRCH
) /* Already dead */
857 else if (!ERRNO_IS_NOT_SUPPORTED(errno
))
858 log_debug_errno(errno
, "Failed to kill process " PID_FMT
" via pidfd_send_signal(), re-trying via kill(): %m",
865 if (kill(s
->child
.pid
, SIGKILL
) < 0)
866 if (errno
!= ESRCH
) /* Already dead */
867 log_debug_errno(errno
, "Failed to kill process " PID_FMT
" via kill(), ignoring: %m",
871 if (!s
->child
.waited
) {
874 /* Reap the child if we can */
875 (void) waitid(P_PID
, s
->child
.pid
, &si
, WEXITED
);
879 if (s
->child
.pidfd_owned
)
880 s
->child
.pidfd
= safe_close(s
->child
.pidfd
);
883 if (s
->destroy_callback
)
884 s
->destroy_callback(s
->userdata
);
886 free(s
->description
);
889 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source
*, source_free
);
891 static int source_set_pending(sd_event_source
*s
, bool b
) {
895 assert(s
->type
!= SOURCE_EXIT
);
903 s
->pending_iteration
= s
->event
->iteration
;
905 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
911 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
913 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
914 struct clock_data
*d
;
916 d
= event_get_clock_data(s
->event
, s
->type
);
919 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
920 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
921 d
->needs_rearm
= true;
924 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
925 struct signal_data
*d
;
927 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
928 if (d
&& d
->current
== s
)
932 if (s
->type
== SOURCE_INOTIFY
) {
934 assert(s
->inotify
.inode_data
);
935 assert(s
->inotify
.inode_data
->inotify_data
);
938 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
940 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
941 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
948 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
953 s
= new(sd_event_source
, 1);
957 *s
= (struct sd_event_source
) {
960 .floating
= floating
,
962 .pending_index
= PRIOQ_IDX_NULL
,
963 .prepare_index
= PRIOQ_IDX_NULL
,
969 LIST_PREPEND(sources
, e
->sources
, s
);
975 _public_
int sd_event_add_io(
977 sd_event_source
**ret
,
980 sd_event_io_handler_t callback
,
983 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
986 assert_return(e
, -EINVAL
);
987 assert_return(e
= event_resolve(e
), -ENOPKG
);
988 assert_return(fd
>= 0, -EBADF
);
989 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
990 assert_return(callback
, -EINVAL
);
991 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
992 assert_return(!event_pid_changed(e
), -ECHILD
);
994 s
= source_new(e
, !ret
, SOURCE_IO
);
998 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1000 s
->io
.events
= events
;
1001 s
->io
.callback
= callback
;
1002 s
->userdata
= userdata
;
1003 s
->enabled
= SD_EVENT_ON
;
1005 r
= source_io_register(s
, s
->enabled
, events
);
1016 static void initialize_perturb(sd_event
*e
) {
1017 sd_id128_t bootid
= {};
1019 /* When we sleep for longer, we try to realign the wakeup to
1020 the same time within each minute/second/250ms, so that
1021 events all across the system can be coalesced into a single
1022 CPU wakeup. However, let's take some system-specific
1023 randomness for this value, so that in a network of systems
1024 with synced clocks timer events are distributed a
1025 bit. Here, we calculate a perturbation usec offset from the
1028 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1031 if (sd_id128_get_boot(&bootid
) >= 0)
1032 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1035 static int event_setup_timer_fd(
1037 struct clock_data
*d
,
1043 if (_likely_(d
->fd
>= 0))
1046 _cleanup_close_
int fd
= -1;
1049 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1053 fd
= fd_move_above_stdio(fd
);
1055 struct epoll_event ev
= {
1060 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1064 d
->fd
= TAKE_FD(fd
);
1068 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1071 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1074 _public_
int sd_event_add_time(
1076 sd_event_source
**ret
,
1080 sd_event_time_handler_t callback
,
1083 EventSourceType type
;
1084 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1085 struct clock_data
*d
;
1088 assert_return(e
, -EINVAL
);
1089 assert_return(e
= event_resolve(e
), -ENOPKG
);
1090 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1091 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1092 assert_return(!event_pid_changed(e
), -ECHILD
);
1094 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1097 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1102 callback
= time_exit_callback
;
1104 d
= event_get_clock_data(e
, type
);
1107 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1111 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1116 r
= event_setup_timer_fd(e
, d
, clock
);
1121 s
= source_new(e
, !ret
, type
);
1125 s
->time
.next
= usec
;
1126 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1127 s
->time
.callback
= callback
;
1128 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1129 s
->userdata
= userdata
;
1130 s
->enabled
= SD_EVENT_ONESHOT
;
1132 d
->needs_rearm
= true;
1134 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1138 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1149 _public_
int sd_event_add_time_relative(
1151 sd_event_source
**ret
,
1155 sd_event_time_handler_t callback
,
1161 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1162 * checks for overflow. */
1164 r
= sd_event_now(e
, clock
, &t
);
1168 if (usec
>= USEC_INFINITY
- t
)
1171 return sd_event_add_time(e
, ret
, clock
, t
+ usec
, accuracy
, callback
, userdata
);
1174 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1177 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1180 _public_
int sd_event_add_signal(
1182 sd_event_source
**ret
,
1184 sd_event_signal_handler_t callback
,
1187 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1188 struct signal_data
*d
;
1191 assert_return(e
, -EINVAL
);
1192 assert_return(e
= event_resolve(e
), -ENOPKG
);
1193 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1194 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1195 assert_return(!event_pid_changed(e
), -ECHILD
);
1198 callback
= signal_exit_callback
;
1200 r
= signal_is_blocked(sig
);
1206 if (!e
->signal_sources
) {
1207 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1208 if (!e
->signal_sources
)
1210 } else if (e
->signal_sources
[sig
])
1213 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1217 s
->signal
.sig
= sig
;
1218 s
->signal
.callback
= callback
;
1219 s
->userdata
= userdata
;
1220 s
->enabled
= SD_EVENT_ON
;
1222 e
->signal_sources
[sig
] = s
;
1224 r
= event_make_signal_data(e
, sig
, &d
);
1228 /* Use the signal name as description for the event source by default */
1229 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1238 static bool shall_use_pidfd(void) {
1239 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1240 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1243 _public_
int sd_event_add_child(
1245 sd_event_source
**ret
,
1248 sd_event_child_handler_t callback
,
1251 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1254 assert_return(e
, -EINVAL
);
1255 assert_return(e
= event_resolve(e
), -ENOPKG
);
1256 assert_return(pid
> 1, -EINVAL
);
1257 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1258 assert_return(options
!= 0, -EINVAL
);
1259 assert_return(callback
, -EINVAL
);
1260 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1261 assert_return(!event_pid_changed(e
), -ECHILD
);
1263 if (e
->n_enabled_child_sources
== 0) {
1264 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1265 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1266 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1269 * (As an optimization we only do this check on the first child event source created.) */
1270 r
= signal_is_blocked(SIGCHLD
);
1277 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1281 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1284 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1288 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1290 s
->child
.options
= options
;
1291 s
->child
.callback
= callback
;
1292 s
->userdata
= userdata
;
1293 s
->enabled
= SD_EVENT_ONESHOT
;
1295 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1296 * pin the PID, and make regular waitid() handling race-free. */
1298 if (shall_use_pidfd()) {
1299 s
->child
.pidfd
= pidfd_open(s
->child
.pid
, 0);
1300 if (s
->child
.pidfd
< 0) {
1301 /* Propagate errors unless the syscall is not supported or blocked */
1302 if (!ERRNO_IS_NOT_SUPPORTED(errno
) && !ERRNO_IS_PRIVILEGE(errno
))
1305 s
->child
.pidfd_owned
= true; /* If we allocate the pidfd we own it by default */
1307 s
->child
.pidfd
= -1;
1309 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1313 e
->n_enabled_child_sources
++;
1315 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
1316 /* We have a pidfd and we only want to watch for exit */
1318 r
= source_child_pidfd_register(s
, s
->enabled
);
1320 e
->n_enabled_child_sources
--;
1324 /* We have no pidfd or we shall wait for some other event than WEXITED */
1326 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1328 e
->n_enabled_child_sources
--;
1332 e
->need_process_child
= true;
1342 _public_
int sd_event_add_child_pidfd(
1344 sd_event_source
**ret
,
1347 sd_event_child_handler_t callback
,
1351 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1355 assert_return(e
, -EINVAL
);
1356 assert_return(e
= event_resolve(e
), -ENOPKG
);
1357 assert_return(pidfd
>= 0, -EBADF
);
1358 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1359 assert_return(options
!= 0, -EINVAL
);
1360 assert_return(callback
, -EINVAL
);
1361 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1362 assert_return(!event_pid_changed(e
), -ECHILD
);
1364 if (e
->n_enabled_child_sources
== 0) {
1365 r
= signal_is_blocked(SIGCHLD
);
1372 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1376 r
= pidfd_get_pid(pidfd
, &pid
);
1380 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1383 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1387 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1388 s
->child
.pidfd
= pidfd
;
1390 s
->child
.options
= options
;
1391 s
->child
.callback
= callback
;
1392 s
->child
.pidfd_owned
= false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1393 s
->userdata
= userdata
;
1394 s
->enabled
= SD_EVENT_ONESHOT
;
1396 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1400 e
->n_enabled_child_sources
++;
1402 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
1403 /* We only want to watch for WEXITED */
1405 r
= source_child_pidfd_register(s
, s
->enabled
);
1407 e
->n_enabled_child_sources
--;
1411 /* We shall wait for some other event than WEXITED */
1413 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1415 e
->n_enabled_child_sources
--;
1419 e
->need_process_child
= true;
1429 _public_
int sd_event_add_defer(
1431 sd_event_source
**ret
,
1432 sd_event_handler_t callback
,
1435 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1438 assert_return(e
, -EINVAL
);
1439 assert_return(e
= event_resolve(e
), -ENOPKG
);
1440 assert_return(callback
, -EINVAL
);
1441 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1442 assert_return(!event_pid_changed(e
), -ECHILD
);
1444 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1448 s
->defer
.callback
= callback
;
1449 s
->userdata
= userdata
;
1450 s
->enabled
= SD_EVENT_ONESHOT
;
1452 r
= source_set_pending(s
, true);
1463 _public_
int sd_event_add_post(
1465 sd_event_source
**ret
,
1466 sd_event_handler_t callback
,
1469 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1472 assert_return(e
, -EINVAL
);
1473 assert_return(e
= event_resolve(e
), -ENOPKG
);
1474 assert_return(callback
, -EINVAL
);
1475 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1476 assert_return(!event_pid_changed(e
), -ECHILD
);
1478 s
= source_new(e
, !ret
, SOURCE_POST
);
1482 s
->post
.callback
= callback
;
1483 s
->userdata
= userdata
;
1484 s
->enabled
= SD_EVENT_ON
;
1486 r
= set_ensure_put(&e
->post_sources
, NULL
, s
);
1498 _public_
int sd_event_add_exit(
1500 sd_event_source
**ret
,
1501 sd_event_handler_t callback
,
1504 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1507 assert_return(e
, -EINVAL
);
1508 assert_return(e
= event_resolve(e
), -ENOPKG
);
1509 assert_return(callback
, -EINVAL
);
1510 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1511 assert_return(!event_pid_changed(e
), -ECHILD
);
1513 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1517 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1521 s
->exit
.callback
= callback
;
1522 s
->userdata
= userdata
;
1523 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1524 s
->enabled
= SD_EVENT_ONESHOT
;
1526 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1537 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
1543 assert(hashmap_isempty(d
->inodes
));
1544 assert(hashmap_isempty(d
->wd
));
1546 if (d
->buffer_filled
> 0)
1547 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
1549 hashmap_free(d
->inodes
);
1550 hashmap_free(d
->wd
);
1552 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
1555 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
1556 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
1563 static int event_make_inotify_data(
1566 struct inotify_data
**ret
) {
1568 _cleanup_close_
int fd
= -1;
1569 struct inotify_data
*d
;
1574 d
= hashmap_get(e
->inotify_data
, &priority
);
1581 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
1585 fd
= fd_move_above_stdio(fd
);
1587 r
= hashmap_ensure_allocated(&e
->inotify_data
, &uint64_hash_ops
);
1591 d
= new(struct inotify_data
, 1);
1595 *d
= (struct inotify_data
) {
1596 .wakeup
= WAKEUP_INOTIFY_DATA
,
1598 .priority
= priority
,
1601 r
= hashmap_put(e
->inotify_data
, &d
->priority
, d
);
1603 d
->fd
= safe_close(d
->fd
);
1608 struct epoll_event ev
= {
1613 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
1615 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1616 * remove the fd from the epoll first, which we don't want as we couldn't
1617 * add it in the first place. */
1618 event_free_inotify_data(e
, d
);
1628 static int inode_data_compare(const struct inode_data
*x
, const struct inode_data
*y
) {
1634 r
= CMP(x
->dev
, y
->dev
);
1638 return CMP(x
->ino
, y
->ino
);
1641 static void inode_data_hash_func(const struct inode_data
*d
, struct siphash
*state
) {
1644 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
1645 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
1648 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops
, struct inode_data
, inode_data_hash_func
, inode_data_compare
);
1650 static void event_free_inode_data(
1652 struct inode_data
*d
) {
1659 assert(!d
->event_sources
);
1662 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
1666 if (d
->inotify_data
) {
1669 if (d
->inotify_data
->fd
>= 0) {
1670 /* So here's a problem. At the time this runs the watch descriptor might already be
1671 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1672 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1673 * likely case to happen. */
1675 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
1676 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
1679 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
1682 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
1688 static void event_gc_inode_data(
1690 struct inode_data
*d
) {
1692 struct inotify_data
*inotify_data
;
1699 if (d
->event_sources
)
1702 inotify_data
= d
->inotify_data
;
1703 event_free_inode_data(e
, d
);
1705 if (inotify_data
&& hashmap_isempty(inotify_data
->inodes
))
1706 event_free_inotify_data(e
, inotify_data
);
1709 static int event_make_inode_data(
1711 struct inotify_data
*inotify_data
,
1714 struct inode_data
**ret
) {
1716 struct inode_data
*d
, key
;
1720 assert(inotify_data
);
1722 key
= (struct inode_data
) {
1727 d
= hashmap_get(inotify_data
->inodes
, &key
);
1735 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
1739 d
= new(struct inode_data
, 1);
1743 *d
= (struct inode_data
) {
1748 .inotify_data
= inotify_data
,
1751 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
1763 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
1764 bool excl_unlink
= true;
1765 uint32_t combined
= 0;
1770 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1771 * the IN_EXCL_UNLINK flag is ANDed instead.
1773 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1774 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1775 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1776 * events we don't care for client-side. */
1778 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
1780 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
1781 excl_unlink
= false;
1783 combined
|= s
->inotify
.mask
;
1786 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
1789 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
1790 uint32_t combined_mask
;
1796 combined_mask
= inode_data_determine_mask(d
);
1798 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
1801 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
1805 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
1810 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
1812 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
1818 } else if (d
->wd
!= wd
) {
1820 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1821 (void) inotify_rm_watch(d
->fd
, wd
);
1825 d
->combined_mask
= combined_mask
;
1829 _public_
int sd_event_add_inotify(
1831 sd_event_source
**ret
,
1834 sd_event_inotify_handler_t callback
,
1837 struct inotify_data
*inotify_data
= NULL
;
1838 struct inode_data
*inode_data
= NULL
;
1839 _cleanup_close_
int fd
= -1;
1840 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1844 assert_return(e
, -EINVAL
);
1845 assert_return(e
= event_resolve(e
), -ENOPKG
);
1846 assert_return(path
, -EINVAL
);
1847 assert_return(callback
, -EINVAL
);
1848 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1849 assert_return(!event_pid_changed(e
), -ECHILD
);
1851 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1852 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1853 * the user can't use them for us. */
1854 if (mask
& IN_MASK_ADD
)
1857 fd
= open(path
, O_PATH
|O_CLOEXEC
|
1858 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0)|
1859 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
1863 if (fstat(fd
, &st
) < 0)
1866 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
1870 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
1871 s
->inotify
.mask
= mask
;
1872 s
->inotify
.callback
= callback
;
1873 s
->userdata
= userdata
;
1875 /* Allocate an inotify object for this priority, and an inode object within it */
1876 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
1880 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
1882 event_free_inotify_data(e
, inotify_data
);
1886 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1887 * the event source, until then, for which we need the original inode. */
1888 if (inode_data
->fd
< 0) {
1889 inode_data
->fd
= TAKE_FD(fd
);
1890 LIST_PREPEND(to_close
, e
->inode_data_to_close
, inode_data
);
1893 /* Link our event source to the inode data object */
1894 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1895 s
->inotify
.inode_data
= inode_data
;
1897 /* Actually realize the watch now */
1898 r
= inode_data_realize_watch(e
, inode_data
);
1902 (void) sd_event_source_set_description(s
, path
);
1911 static sd_event_source
* event_source_free(sd_event_source
*s
) {
1915 /* Here's a special hack: when we are called from a
1916 * dispatch handler we won't free the event source
1917 * immediately, but we will detach the fd from the
1918 * epoll. This way it is safe for the caller to unref
1919 * the event source and immediately close the fd, but
1920 * we still retain a valid event source object after
1923 if (s
->dispatching
) {
1924 if (s
->type
== SOURCE_IO
)
1925 source_io_unregister(s
);
1927 source_disconnect(s
);
1934 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source
, sd_event_source
, event_source_free
);
1936 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1937 assert_return(s
, -EINVAL
);
1938 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1940 return free_and_strdup(&s
->description
, description
);
1943 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1944 assert_return(s
, -EINVAL
);
1945 assert_return(description
, -EINVAL
);
1946 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1948 if (!s
->description
)
1951 *description
= s
->description
;
1955 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1956 assert_return(s
, NULL
);
1961 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1962 assert_return(s
, -EINVAL
);
1963 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1964 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1965 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1970 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1971 assert_return(s
, -EINVAL
);
1972 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1973 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1978 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1981 assert_return(s
, -EINVAL
);
1982 assert_return(fd
>= 0, -EBADF
);
1983 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1984 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1989 if (s
->enabled
== SD_EVENT_OFF
) {
1991 s
->io
.registered
= false;
1995 saved_fd
= s
->io
.fd
;
1996 assert(s
->io
.registered
);
1999 s
->io
.registered
= false;
2001 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
2003 s
->io
.fd
= saved_fd
;
2004 s
->io
.registered
= true;
2008 (void) epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
2014 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
2015 assert_return(s
, -EINVAL
);
2016 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2021 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
2022 assert_return(s
, -EINVAL
);
2023 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2029 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
2030 assert_return(s
, -EINVAL
);
2031 assert_return(events
, -EINVAL
);
2032 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2033 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2035 *events
= s
->io
.events
;
2039 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
2042 assert_return(s
, -EINVAL
);
2043 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2044 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
2045 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2046 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2048 /* edge-triggered updates are never skipped, so we can reset edges */
2049 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
2052 r
= source_set_pending(s
, false);
2056 if (s
->enabled
!= SD_EVENT_OFF
) {
2057 r
= source_io_register(s
, s
->enabled
, events
);
2062 s
->io
.events
= events
;
2067 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
2068 assert_return(s
, -EINVAL
);
2069 assert_return(revents
, -EINVAL
);
2070 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2071 assert_return(s
->pending
, -ENODATA
);
2072 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2074 *revents
= s
->io
.revents
;
2078 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
2079 assert_return(s
, -EINVAL
);
2080 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
2081 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2083 return s
->signal
.sig
;
2086 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
2087 assert_return(s
, -EINVAL
);
2088 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2090 *priority
= s
->priority
;
2094 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
2095 bool rm_inotify
= false, rm_inode
= false;
2096 struct inotify_data
*new_inotify_data
= NULL
;
2097 struct inode_data
*new_inode_data
= NULL
;
2100 assert_return(s
, -EINVAL
);
2101 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2102 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2104 if (s
->priority
== priority
)
2107 if (s
->type
== SOURCE_INOTIFY
) {
2108 struct inode_data
*old_inode_data
;
2110 assert(s
->inotify
.inode_data
);
2111 old_inode_data
= s
->inotify
.inode_data
;
2113 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2114 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2115 * events we allow priority changes only until the first following iteration. */
2116 if (old_inode_data
->fd
< 0)
2119 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
2124 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
2129 if (new_inode_data
->fd
< 0) {
2130 /* Duplicate the fd for the new inode object if we don't have any yet */
2131 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
2132 if (new_inode_data
->fd
< 0) {
2137 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close
, new_inode_data
);
2140 /* Move the event source to the new inode data structure */
2141 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2142 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2143 s
->inotify
.inode_data
= new_inode_data
;
2145 /* Now create the new watch */
2146 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
2149 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2150 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2151 s
->inotify
.inode_data
= old_inode_data
;
2155 s
->priority
= priority
;
2157 event_gc_inode_data(s
->event
, old_inode_data
);
2159 } else if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
2160 struct signal_data
*old
, *d
;
2162 /* Move us from the signalfd belonging to the old
2163 * priority to the signalfd of the new priority */
2165 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
2167 s
->priority
= priority
;
2169 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
2171 s
->priority
= old
->priority
;
2175 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
2177 s
->priority
= priority
;
2180 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2183 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2185 if (s
->type
== SOURCE_EXIT
)
2186 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2192 event_free_inode_data(s
->event
, new_inode_data
);
2195 event_free_inotify_data(s
->event
, new_inotify_data
);
2200 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
2201 assert_return(s
, -EINVAL
);
2202 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2206 return s
->enabled
!= SD_EVENT_OFF
;
2209 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
2212 assert_return(s
, -EINVAL
);
2213 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
2214 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2216 /* If we are dead anyway, we are fine with turning off
2217 * sources, but everything else needs to fail. */
2218 if (s
->event
->state
== SD_EVENT_FINISHED
)
2219 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
2221 if (s
->enabled
== m
)
2224 if (m
== SD_EVENT_OFF
) {
2226 /* Unset the pending flag when this event source is disabled */
2227 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2228 r
= source_set_pending(s
, false);
2236 source_io_unregister(s
);
2240 case SOURCE_TIME_REALTIME
:
2241 case SOURCE_TIME_BOOTTIME
:
2242 case SOURCE_TIME_MONOTONIC
:
2243 case SOURCE_TIME_REALTIME_ALARM
:
2244 case SOURCE_TIME_BOOTTIME_ALARM
: {
2245 struct clock_data
*d
;
2248 d
= event_get_clock_data(s
->event
, s
->type
);
2251 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2252 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2253 d
->needs_rearm
= true;
2260 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2266 assert(s
->event
->n_enabled_child_sources
> 0);
2267 s
->event
->n_enabled_child_sources
--;
2269 if (EVENT_SOURCE_WATCH_PIDFD(s
))
2270 source_child_pidfd_unregister(s
);
2272 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2278 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2283 case SOURCE_INOTIFY
:
2288 assert_not_reached("Wut? I shouldn't exist.");
2293 /* Unset the pending flag when this event source is enabled */
2294 if (s
->enabled
== SD_EVENT_OFF
&& !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2295 r
= source_set_pending(s
, false);
2303 r
= source_io_register(s
, m
, s
->io
.events
);
2310 case SOURCE_TIME_REALTIME
:
2311 case SOURCE_TIME_BOOTTIME
:
2312 case SOURCE_TIME_MONOTONIC
:
2313 case SOURCE_TIME_REALTIME_ALARM
:
2314 case SOURCE_TIME_BOOTTIME_ALARM
: {
2315 struct clock_data
*d
;
2318 d
= event_get_clock_data(s
->event
, s
->type
);
2321 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2322 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2323 d
->needs_rearm
= true;
2331 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2333 s
->enabled
= SD_EVENT_OFF
;
2334 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2342 if (s
->enabled
== SD_EVENT_OFF
)
2343 s
->event
->n_enabled_child_sources
++;
2347 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
2348 /* yes, we have pidfd */
2350 r
= source_child_pidfd_register(s
, s
->enabled
);
2352 s
->enabled
= SD_EVENT_OFF
;
2353 s
->event
->n_enabled_child_sources
--;
2357 /* no pidfd, or something other to watch for than WEXITED */
2359 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2361 s
->enabled
= SD_EVENT_OFF
;
2362 s
->event
->n_enabled_child_sources
--;
2363 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2372 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2377 case SOURCE_INOTIFY
:
2382 assert_not_reached("Wut? I shouldn't exist.");
2387 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2390 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2395 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
2396 assert_return(s
, -EINVAL
);
2397 assert_return(usec
, -EINVAL
);
2398 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2399 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2401 *usec
= s
->time
.next
;
2405 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
2406 struct clock_data
*d
;
2409 assert_return(s
, -EINVAL
);
2410 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2411 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2412 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2414 r
= source_set_pending(s
, false);
2418 s
->time
.next
= usec
;
2420 d
= event_get_clock_data(s
->event
, s
->type
);
2423 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2424 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2425 d
->needs_rearm
= true;
2430 _public_
int sd_event_source_set_time_relative(sd_event_source
*s
, uint64_t usec
) {
2434 assert_return(s
, -EINVAL
);
2435 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2437 r
= sd_event_now(s
->event
, event_source_type_to_clock(s
->type
), &t
);
2441 if (usec
>= USEC_INFINITY
- t
)
2444 return sd_event_source_set_time(s
, t
+ usec
);
2447 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
2448 assert_return(s
, -EINVAL
);
2449 assert_return(usec
, -EINVAL
);
2450 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2451 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2453 *usec
= s
->time
.accuracy
;
2457 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
2458 struct clock_data
*d
;
2461 assert_return(s
, -EINVAL
);
2462 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
2463 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2464 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2465 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2467 r
= source_set_pending(s
, false);
2472 usec
= DEFAULT_ACCURACY_USEC
;
2474 s
->time
.accuracy
= usec
;
2476 d
= event_get_clock_data(s
->event
, s
->type
);
2479 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2480 d
->needs_rearm
= true;
2485 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
2486 assert_return(s
, -EINVAL
);
2487 assert_return(clock
, -EINVAL
);
2488 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2489 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2491 *clock
= event_source_type_to_clock(s
->type
);
2495 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
2496 assert_return(s
, -EINVAL
);
2497 assert_return(pid
, -EINVAL
);
2498 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2499 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2501 *pid
= s
->child
.pid
;
2505 _public_
int sd_event_source_get_child_pidfd(sd_event_source
*s
) {
2506 assert_return(s
, -EINVAL
);
2507 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2508 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2510 if (s
->child
.pidfd
< 0)
2513 return s
->child
.pidfd
;
2516 _public_
int sd_event_source_send_child_signal(sd_event_source
*s
, int sig
, const siginfo_t
*si
, unsigned flags
) {
2517 assert_return(s
, -EINVAL
);
2518 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2519 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2520 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
2522 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2523 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2525 if (s
->child
.exited
)
2528 if (s
->child
.pidfd
>= 0) {
2531 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2536 if (pidfd_send_signal(s
->child
.pidfd
, sig
, si
? ©
: NULL
, 0) < 0) {
2537 /* Let's propagate the error only if the system call is not implemented or prohibited */
2538 if (!ERRNO_IS_NOT_SUPPORTED(errno
) && !ERRNO_IS_PRIVILEGE(errno
))
2544 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2550 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2551 siginfo_t copy
= *si
;
2553 if (rt_sigqueueinfo(s
->child
.pid
, sig
, ©
) < 0)
2555 } else if (kill(s
->child
.pid
, sig
) < 0)
2561 _public_
int sd_event_source_get_child_pidfd_own(sd_event_source
*s
) {
2562 assert_return(s
, -EINVAL
);
2563 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2565 if (s
->child
.pidfd
< 0)
2568 return s
->child
.pidfd_owned
;
2571 _public_
int sd_event_source_set_child_pidfd_own(sd_event_source
*s
, int own
) {
2572 assert_return(s
, -EINVAL
);
2573 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2575 if (s
->child
.pidfd
< 0)
2578 s
->child
.pidfd_owned
= own
;
2582 _public_
int sd_event_source_get_child_process_own(sd_event_source
*s
) {
2583 assert_return(s
, -EINVAL
);
2584 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2586 return s
->child
.process_owned
;
2589 _public_
int sd_event_source_set_child_process_own(sd_event_source
*s
, int own
) {
2590 assert_return(s
, -EINVAL
);
2591 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2593 s
->child
.process_owned
= own
;
2597 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
2598 assert_return(s
, -EINVAL
);
2599 assert_return(mask
, -EINVAL
);
2600 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
2601 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2603 *mask
= s
->inotify
.mask
;
2607 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
2610 assert_return(s
, -EINVAL
);
2611 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2612 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2613 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2615 if (s
->prepare
== callback
)
2618 if (callback
&& s
->prepare
) {
2619 s
->prepare
= callback
;
2623 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
2627 s
->prepare
= callback
;
2630 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
2634 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
2639 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
2640 assert_return(s
, NULL
);
2645 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
2648 assert_return(s
, NULL
);
2651 s
->userdata
= userdata
;
2656 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
2663 if (a
>= USEC_INFINITY
)
2664 return USEC_INFINITY
;
2669 initialize_perturb(e
);
2672 Find a good time to wake up again between times a and b. We
2673 have two goals here:
2675 a) We want to wake up as seldom as possible, hence prefer
2676 later times over earlier times.
2678 b) But if we have to wake up, then let's make sure to
2679 dispatch as much as possible on the entire system.
2681 We implement this by waking up everywhere at the same time
2682 within any given minute if we can, synchronised via the
2683 perturbation value determined from the boot ID. If we can't,
2684 then we try to find the same spot in every 10s, then 1s and
2685 then 250ms step. Otherwise, we pick the last possible time
2689 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
2691 if (_unlikely_(c
< USEC_PER_MINUTE
))
2694 c
-= USEC_PER_MINUTE
;
2700 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
2702 if (_unlikely_(c
< USEC_PER_SEC
*10))
2705 c
-= USEC_PER_SEC
*10;
2711 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
2713 if (_unlikely_(c
< USEC_PER_SEC
))
2722 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
2724 if (_unlikely_(c
< USEC_PER_MSEC
*250))
2727 c
-= USEC_PER_MSEC
*250;
2736 static int event_arm_timer(
2738 struct clock_data
*d
) {
2740 struct itimerspec its
= {};
2741 sd_event_source
*a
, *b
;
2748 if (!d
->needs_rearm
)
2751 d
->needs_rearm
= false;
2753 a
= prioq_peek(d
->earliest
);
2754 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2759 if (d
->next
== USEC_INFINITY
)
2763 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2767 d
->next
= USEC_INFINITY
;
2771 b
= prioq_peek(d
->latest
);
2772 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2774 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2778 assert_se(d
->fd
>= 0);
2781 /* We don' want to disarm here, just mean some time looooong ago. */
2782 its
.it_value
.tv_sec
= 0;
2783 its
.it_value
.tv_nsec
= 1;
2785 timespec_store(&its
.it_value
, t
);
2787 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2795 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2798 assert(s
->type
== SOURCE_IO
);
2800 /* If the event source was already pending, we just OR in the
2801 * new revents, otherwise we reset the value. The ORing is
2802 * necessary to handle EPOLLONESHOT events properly where
2803 * readability might happen independently of writability, and
2804 * we need to keep track of both */
2807 s
->io
.revents
|= revents
;
2809 s
->io
.revents
= revents
;
2811 return source_set_pending(s
, true);
2814 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2821 assert_return(events
== EPOLLIN
, -EIO
);
2823 ss
= read(fd
, &x
, sizeof(x
));
2825 if (IN_SET(errno
, EAGAIN
, EINTR
))
2831 if (_unlikely_(ss
!= sizeof(x
)))
2835 *next
= USEC_INFINITY
;
2840 static int process_timer(
2843 struct clock_data
*d
) {
2852 s
= prioq_peek(d
->earliest
);
2855 s
->enabled
== SD_EVENT_OFF
||
2859 r
= source_set_pending(s
, true);
2863 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2864 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2865 d
->needs_rearm
= true;
2871 static int process_child(sd_event
*e
) {
2878 e
->need_process_child
= false;
2881 So, this is ugly. We iteratively invoke waitid() with P_PID
2882 + WNOHANG for each PID we wait for, instead of using
2883 P_ALL. This is because we only want to get child
2884 information of very specific child processes, and not all
2885 of them. We might not have processed the SIGCHLD even of a
2886 previous invocation and we don't want to maintain a
2887 unbounded *per-child* event queue, hence we really don't
2888 want anything flushed out of the kernel's queue that we
2889 don't care about. Since this is O(n) this means that if you
2890 have a lot of processes you probably want to handle SIGCHLD
2893 We do not reap the children here (by using WNOWAIT), this
2894 is only done after the event source is dispatched so that
2895 the callback still sees the process as a zombie.
2898 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2899 assert(s
->type
== SOURCE_CHILD
);
2904 if (s
->enabled
== SD_EVENT_OFF
)
2907 if (s
->child
.exited
)
2910 if (EVENT_SOURCE_WATCH_PIDFD(s
)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2913 zero(s
->child
.siginfo
);
2914 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2915 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2919 if (s
->child
.siginfo
.si_pid
!= 0) {
2920 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2923 s
->child
.exited
= true;
2925 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2926 /* If the child isn't dead then let's
2927 * immediately remove the state change
2928 * from the queue, since there's no
2929 * benefit in leaving it queued */
2931 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2932 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2935 r
= source_set_pending(s
, true);
2944 static int process_pidfd(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2947 assert(s
->type
== SOURCE_CHILD
);
2952 if (s
->enabled
== SD_EVENT_OFF
)
2955 if (!EVENT_SOURCE_WATCH_PIDFD(s
))
2958 zero(s
->child
.siginfo
);
2959 if (waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
| WNOWAIT
| s
->child
.options
) < 0)
2962 if (s
->child
.siginfo
.si_pid
== 0)
2965 if (IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
))
2966 s
->child
.exited
= true;
2968 return source_set_pending(s
, true);
2971 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2972 bool read_one
= false;
2977 assert_return(events
== EPOLLIN
, -EIO
);
2979 /* If there's a signal queued on this priority and SIGCHLD is
2980 on this priority too, then make sure to recheck the
2981 children we watch. This is because we only ever dequeue
2982 the first signal per priority, and if we dequeue one, and
2983 SIGCHLD might be enqueued later we wouldn't know, but we
2984 might have higher priority children we care about hence we
2985 need to check that explicitly. */
2987 if (sigismember(&d
->sigset
, SIGCHLD
))
2988 e
->need_process_child
= true;
2990 /* If there's already an event source pending for this
2991 * priority we don't read another */
2996 struct signalfd_siginfo si
;
2998 sd_event_source
*s
= NULL
;
3000 n
= read(d
->fd
, &si
, sizeof(si
));
3002 if (IN_SET(errno
, EAGAIN
, EINTR
))
3008 if (_unlikely_(n
!= sizeof(si
)))
3011 assert(SIGNAL_VALID(si
.ssi_signo
));
3015 if (e
->signal_sources
)
3016 s
= e
->signal_sources
[si
.ssi_signo
];
3022 s
->signal
.siginfo
= si
;
3025 r
= source_set_pending(s
, true);
3033 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
) {
3039 assert_return(revents
== EPOLLIN
, -EIO
);
3041 /* If there's already an event source pending for this priority, don't read another */
3042 if (d
->n_pending
> 0)
3045 /* Is the read buffer non-empty? If so, let's not read more */
3046 if (d
->buffer_filled
> 0)
3049 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
3051 if (IN_SET(errno
, EAGAIN
, EINTR
))
3058 d
->buffer_filled
= (size_t) n
;
3059 LIST_PREPEND(buffered
, e
->inotify_data_buffered
, d
);
3064 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
3067 assert(sz
<= d
->buffer_filled
);
3072 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3073 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
3074 d
->buffer_filled
-= sz
;
3076 if (d
->buffer_filled
== 0)
3077 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
3080 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
3086 /* If there's already an event source pending for this priority, don't read another */
3087 if (d
->n_pending
> 0)
3090 while (d
->buffer_filled
> 0) {
3093 /* Let's validate that the event structures are complete */
3094 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
3097 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
3098 if (d
->buffer_filled
< sz
)
3101 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
3102 struct inode_data
*inode_data
;
3105 /* The queue overran, let's pass this event to all event sources connected to this inotify
3108 HASHMAP_FOREACH(inode_data
, d
->inodes
, i
) {
3111 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
3113 if (s
->enabled
== SD_EVENT_OFF
)
3116 r
= source_set_pending(s
, true);
3122 struct inode_data
*inode_data
;
3125 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3126 * our watch descriptor table. */
3127 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
3129 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3131 event_inotify_data_drop(e
, d
, sz
);
3135 /* The watch descriptor was removed by the kernel, let's drop it here too */
3136 inode_data
->wd
= -1;
3138 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3140 event_inotify_data_drop(e
, d
, sz
);
3145 /* Trigger all event sources that are interested in these events. Also trigger all event
3146 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3147 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
3149 if (s
->enabled
== SD_EVENT_OFF
)
3152 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
3153 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
3156 r
= source_set_pending(s
, true);
3162 /* Something pending now? If so, let's finish, otherwise let's read more. */
3163 if (d
->n_pending
> 0)
3170 static int process_inotify(sd_event
*e
) {
3171 struct inotify_data
*d
;
3176 LIST_FOREACH(buffered
, d
, e
->inotify_data_buffered
) {
3177 r
= event_inotify_data_process(e
, d
);
3187 static int source_dispatch(sd_event_source
*s
) {
3188 EventSourceType saved_type
;
3192 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
3194 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3196 saved_type
= s
->type
;
3198 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
3199 r
= source_set_pending(s
, false);
3204 if (s
->type
!= SOURCE_POST
) {
3208 /* If we execute a non-post source, let's mark all
3209 * post sources as pending */
3211 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
3212 if (z
->enabled
== SD_EVENT_OFF
)
3215 r
= source_set_pending(z
, true);
3221 if (s
->enabled
== SD_EVENT_ONESHOT
) {
3222 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3227 s
->dispatching
= true;
3232 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
3235 case SOURCE_TIME_REALTIME
:
3236 case SOURCE_TIME_BOOTTIME
:
3237 case SOURCE_TIME_MONOTONIC
:
3238 case SOURCE_TIME_REALTIME_ALARM
:
3239 case SOURCE_TIME_BOOTTIME_ALARM
:
3240 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
3244 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
3247 case SOURCE_CHILD
: {
3250 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
3252 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
3254 /* Now, reap the PID for good. */
3256 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
3257 s
->child
.waited
= true;
3264 r
= s
->defer
.callback(s
, s
->userdata
);
3268 r
= s
->post
.callback(s
, s
->userdata
);
3272 r
= s
->exit
.callback(s
, s
->userdata
);
3275 case SOURCE_INOTIFY
: {
3276 struct sd_event
*e
= s
->event
;
3277 struct inotify_data
*d
;
3280 assert(s
->inotify
.inode_data
);
3281 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
3283 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
3284 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
3285 assert(d
->buffer_filled
>= sz
);
3287 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
3289 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3291 if (d
->n_pending
== 0)
3292 event_inotify_data_drop(e
, d
, sz
);
3297 case SOURCE_WATCHDOG
:
3298 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
3299 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
3300 assert_not_reached("Wut? I shouldn't exist.");
3303 s
->dispatching
= false;
3306 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
3307 strna(s
->description
), event_source_type_to_string(saved_type
));
3312 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3317 static int event_prepare(sd_event
*e
) {
3325 s
= prioq_peek(e
->prepare
);
3326 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
3329 s
->prepare_iteration
= e
->iteration
;
3330 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
3336 s
->dispatching
= true;
3337 r
= s
->prepare(s
, s
->userdata
);
3338 s
->dispatching
= false;
3341 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3342 strna(s
->description
), event_source_type_to_string(s
->type
));
3347 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3353 static int dispatch_exit(sd_event
*e
) {
3355 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3360 p
= prioq_peek(e
->exit
);
3361 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
3362 e
->state
= SD_EVENT_FINISHED
;
3366 ref
= sd_event_ref(e
);
3368 e
->state
= SD_EVENT_EXITING
;
3369 r
= source_dispatch(p
);
3370 e
->state
= SD_EVENT_INITIAL
;
3374 static sd_event_source
* event_next_pending(sd_event
*e
) {
3379 p
= prioq_peek(e
->pending
);
3383 if (p
->enabled
== SD_EVENT_OFF
)
3389 static int arm_watchdog(sd_event
*e
) {
3390 struct itimerspec its
= {};
3395 assert(e
->watchdog_fd
>= 0);
3397 t
= sleep_between(e
,
3398 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
3399 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
3401 timespec_store(&its
.it_value
, t
);
3403 /* Make sure we never set the watchdog to 0, which tells the
3404 * kernel to disable it. */
3405 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
3406 its
.it_value
.tv_nsec
= 1;
3408 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
3415 static int process_watchdog(sd_event
*e
) {
3421 /* Don't notify watchdog too often */
3422 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
3425 sd_notify(false, "WATCHDOG=1");
3426 e
->watchdog_last
= e
->timestamp
.monotonic
;
3428 return arm_watchdog(e
);
3431 static void event_close_inode_data_fds(sd_event
*e
) {
3432 struct inode_data
*d
;
3436 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3437 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3438 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3439 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3442 while ((d
= e
->inode_data_to_close
)) {
3444 d
->fd
= safe_close(d
->fd
);
3446 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
3450 _public_
int sd_event_prepare(sd_event
*e
) {
3453 assert_return(e
, -EINVAL
);
3454 assert_return(e
= event_resolve(e
), -ENOPKG
);
3455 assert_return(!event_pid_changed(e
), -ECHILD
);
3456 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3457 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3459 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3460 * this check here once, since gettid() is typically not cached, and thus want to minimize
3462 assert_return(!e
->default_event_ptr
|| e
->tid
== gettid(), -EREMOTEIO
);
3464 if (e
->exit_requested
)
3469 e
->state
= SD_EVENT_PREPARING
;
3470 r
= event_prepare(e
);
3471 e
->state
= SD_EVENT_INITIAL
;
3475 r
= event_arm_timer(e
, &e
->realtime
);
3479 r
= event_arm_timer(e
, &e
->boottime
);
3483 r
= event_arm_timer(e
, &e
->monotonic
);
3487 r
= event_arm_timer(e
, &e
->realtime_alarm
);
3491 r
= event_arm_timer(e
, &e
->boottime_alarm
);
3495 event_close_inode_data_fds(e
);
3497 if (event_next_pending(e
) || e
->need_process_child
)
3500 e
->state
= SD_EVENT_ARMED
;
3505 e
->state
= SD_EVENT_ARMED
;
3506 r
= sd_event_wait(e
, 0);
3508 e
->state
= SD_EVENT_ARMED
;
3513 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
3514 size_t event_queue_max
;
3517 assert_return(e
, -EINVAL
);
3518 assert_return(e
= event_resolve(e
), -ENOPKG
);
3519 assert_return(!event_pid_changed(e
), -ECHILD
);
3520 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3521 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
3523 if (e
->exit_requested
) {
3524 e
->state
= SD_EVENT_PENDING
;
3528 event_queue_max
= MAX(e
->n_sources
, 1u);
3529 if (!GREEDY_REALLOC(e
->event_queue
, e
->event_queue_allocated
, event_queue_max
))
3532 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3533 if (e
->inotify_data_buffered
)
3536 m
= epoll_wait(e
->epoll_fd
, e
->event_queue
, event_queue_max
,
3537 timeout
== (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout
, USEC_PER_MSEC
));
3539 if (errno
== EINTR
) {
3540 e
->state
= SD_EVENT_PENDING
;
3548 triple_timestamp_get(&e
->timestamp
);
3550 for (i
= 0; i
< m
; i
++) {
3552 if (e
->event_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
3553 r
= flush_timer(e
, e
->watchdog_fd
, e
->event_queue
[i
].events
, NULL
);
3555 WakeupType
*t
= e
->event_queue
[i
].data
.ptr
;
3559 case WAKEUP_EVENT_SOURCE
: {
3560 sd_event_source
*s
= e
->event_queue
[i
].data
.ptr
;
3567 r
= process_io(e
, s
, e
->event_queue
[i
].events
);
3571 r
= process_pidfd(e
, s
, e
->event_queue
[i
].events
);
3575 assert_not_reached("Unexpected event source type");
3581 case WAKEUP_CLOCK_DATA
: {
3582 struct clock_data
*d
= e
->event_queue
[i
].data
.ptr
;
3586 r
= flush_timer(e
, d
->fd
, e
->event_queue
[i
].events
, &d
->next
);
3590 case WAKEUP_SIGNAL_DATA
:
3591 r
= process_signal(e
, e
->event_queue
[i
].data
.ptr
, e
->event_queue
[i
].events
);
3594 case WAKEUP_INOTIFY_DATA
:
3595 r
= event_inotify_data_read(e
, e
->event_queue
[i
].data
.ptr
, e
->event_queue
[i
].events
);
3599 assert_not_reached("Invalid wake-up pointer");
3606 r
= process_watchdog(e
);
3610 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
3614 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
3618 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
3622 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
3626 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
3630 if (e
->need_process_child
) {
3631 r
= process_child(e
);
3636 r
= process_inotify(e
);
3640 if (event_next_pending(e
)) {
3641 e
->state
= SD_EVENT_PENDING
;
3649 e
->state
= SD_EVENT_INITIAL
;
3654 _public_
int sd_event_dispatch(sd_event
*e
) {
3658 assert_return(e
, -EINVAL
);
3659 assert_return(e
= event_resolve(e
), -ENOPKG
);
3660 assert_return(!event_pid_changed(e
), -ECHILD
);
3661 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3662 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
3664 if (e
->exit_requested
)
3665 return dispatch_exit(e
);
3667 p
= event_next_pending(e
);
3669 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3671 ref
= sd_event_ref(e
);
3672 e
->state
= SD_EVENT_RUNNING
;
3673 r
= source_dispatch(p
);
3674 e
->state
= SD_EVENT_INITIAL
;
3678 e
->state
= SD_EVENT_INITIAL
;
3683 static void event_log_delays(sd_event
*e
) {
3684 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1], *p
;
3689 for (i
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
3690 l
= strpcpyf(&p
, l
, "%u ", e
->delays
[i
]);
3693 log_debug("Event loop iterations: %s", b
);
3696 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
3699 assert_return(e
, -EINVAL
);
3700 assert_return(e
= event_resolve(e
), -ENOPKG
);
3701 assert_return(!event_pid_changed(e
), -ECHILD
);
3702 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3703 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3705 if (e
->profile_delays
&& e
->last_run
) {
3709 this_run
= now(CLOCK_MONOTONIC
);
3711 l
= u64log2(this_run
- e
->last_run
);
3712 assert(l
< sizeof(e
->delays
));
3715 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
3716 event_log_delays(e
);
3717 e
->last_log
= this_run
;
3721 r
= sd_event_prepare(e
);
3723 /* There was nothing? Then wait... */
3724 r
= sd_event_wait(e
, timeout
);
3726 if (e
->profile_delays
)
3727 e
->last_run
= now(CLOCK_MONOTONIC
);
3730 /* There's something now, then let's dispatch it */
3731 r
= sd_event_dispatch(e
);
3741 _public_
int sd_event_loop(sd_event
*e
) {
3742 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3745 assert_return(e
, -EINVAL
);
3746 assert_return(e
= event_resolve(e
), -ENOPKG
);
3747 assert_return(!event_pid_changed(e
), -ECHILD
);
3748 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3750 ref
= sd_event_ref(e
);
3752 while (e
->state
!= SD_EVENT_FINISHED
) {
3753 r
= sd_event_run(e
, (uint64_t) -1);
3758 return e
->exit_code
;
3761 _public_
int sd_event_get_fd(sd_event
*e
) {
3763 assert_return(e
, -EINVAL
);
3764 assert_return(e
= event_resolve(e
), -ENOPKG
);
3765 assert_return(!event_pid_changed(e
), -ECHILD
);
3770 _public_
int sd_event_get_state(sd_event
*e
) {
3771 assert_return(e
, -EINVAL
);
3772 assert_return(e
= event_resolve(e
), -ENOPKG
);
3773 assert_return(!event_pid_changed(e
), -ECHILD
);
3778 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
3779 assert_return(e
, -EINVAL
);
3780 assert_return(e
= event_resolve(e
), -ENOPKG
);
3781 assert_return(code
, -EINVAL
);
3782 assert_return(!event_pid_changed(e
), -ECHILD
);
3784 if (!e
->exit_requested
)
3787 *code
= e
->exit_code
;
3791 _public_
int sd_event_exit(sd_event
*e
, int code
) {
3792 assert_return(e
, -EINVAL
);
3793 assert_return(e
= event_resolve(e
), -ENOPKG
);
3794 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3795 assert_return(!event_pid_changed(e
), -ECHILD
);
3797 e
->exit_requested
= true;
3798 e
->exit_code
= code
;
3803 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
3804 assert_return(e
, -EINVAL
);
3805 assert_return(e
= event_resolve(e
), -ENOPKG
);
3806 assert_return(usec
, -EINVAL
);
3807 assert_return(!event_pid_changed(e
), -ECHILD
);
3809 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
3812 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3813 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3814 * the purpose of getting the time this doesn't matter. */
3815 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
3818 if (!triple_timestamp_is_set(&e
->timestamp
)) {
3819 /* Implicitly fall back to now() if we never ran
3820 * before and thus have no cached time. */
3825 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
3829 _public_
int sd_event_default(sd_event
**ret
) {
3834 return !!default_event
;
3836 if (default_event
) {
3837 *ret
= sd_event_ref(default_event
);
3841 r
= sd_event_new(&e
);
3845 e
->default_event_ptr
= &default_event
;
3853 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
3854 assert_return(e
, -EINVAL
);
3855 assert_return(e
= event_resolve(e
), -ENOPKG
);
3856 assert_return(tid
, -EINVAL
);
3857 assert_return(!event_pid_changed(e
), -ECHILD
);
3867 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
3870 assert_return(e
, -EINVAL
);
3871 assert_return(e
= event_resolve(e
), -ENOPKG
);
3872 assert_return(!event_pid_changed(e
), -ECHILD
);
3874 if (e
->watchdog
== !!b
)
3878 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
3882 /* Issue first ping immediately */
3883 sd_notify(false, "WATCHDOG=1");
3884 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
3886 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
3887 if (e
->watchdog_fd
< 0)
3890 r
= arm_watchdog(e
);
3894 struct epoll_event ev
= {
3896 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
3899 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
3906 if (e
->watchdog_fd
>= 0) {
3907 (void) epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
3908 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3916 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3920 _public_
int sd_event_get_watchdog(sd_event
*e
) {
3921 assert_return(e
, -EINVAL
);
3922 assert_return(e
= event_resolve(e
), -ENOPKG
);
3923 assert_return(!event_pid_changed(e
), -ECHILD
);
3928 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
3929 assert_return(e
, -EINVAL
);
3930 assert_return(e
= event_resolve(e
), -ENOPKG
);
3931 assert_return(!event_pid_changed(e
), -ECHILD
);
3933 *ret
= e
->iteration
;
3937 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
3938 assert_return(s
, -EINVAL
);
3940 s
->destroy_callback
= callback
;
3944 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
3945 assert_return(s
, -EINVAL
);
3948 *ret
= s
->destroy_callback
;
3950 return !!s
->destroy_callback
;
3953 _public_
int sd_event_source_get_floating(sd_event_source
*s
) {
3954 assert_return(s
, -EINVAL
);
3959 _public_
int sd_event_source_set_floating(sd_event_source
*s
, int b
) {
3960 assert_return(s
, -EINVAL
);
3962 if (s
->floating
== !!b
)
3965 if (!s
->event
) /* Already disconnected */
3971 sd_event_source_ref(s
);
3972 sd_event_unref(s
->event
);
3974 sd_event_ref(s
->event
);
3975 sd_event_source_unref(s
);