1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <sys/timerfd.h>
11 #include "alloc-util.h"
12 #include "event-source.h"
18 #include "memory-util.h"
19 #include "missing_syscall.h"
21 #include "process-util.h"
23 #include "signal-util.h"
24 #include "string-table.h"
25 #include "string-util.h"
27 #include "time-util.h"
29 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
33 [SOURCE_TIME_REALTIME
] = "realtime",
34 [SOURCE_TIME_BOOTTIME
] = "bootime",
35 [SOURCE_TIME_MONOTONIC
] = "monotonic",
36 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
37 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
38 [SOURCE_SIGNAL
] = "signal",
39 [SOURCE_CHILD
] = "child",
40 [SOURCE_DEFER
] = "defer",
41 [SOURCE_POST
] = "post",
42 [SOURCE_EXIT
] = "exit",
43 [SOURCE_WATCHDOG
] = "watchdog",
44 [SOURCE_INOTIFY
] = "inotify",
47 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
49 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
60 /* timerfd_create() only supports these five clocks so far. We
61 * can add support for more clocks when the kernel learns to
62 * deal with them, too. */
63 struct clock_data realtime
;
64 struct clock_data boottime
;
65 struct clock_data monotonic
;
66 struct clock_data realtime_alarm
;
67 struct clock_data boottime_alarm
;
71 sd_event_source
**signal_sources
; /* indexed by signal number */
72 Hashmap
*signal_data
; /* indexed by priority */
74 Hashmap
*child_sources
;
75 unsigned n_enabled_child_sources
;
81 Hashmap
*inotify_data
; /* indexed by priority */
83 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
84 LIST_HEAD(struct inode_data
, inode_data_to_close
);
86 /* A list of inotify objects that already have events buffered which aren't processed yet */
87 LIST_HEAD(struct inotify_data
, inotify_data_buffered
);
92 triple_timestamp timestamp
;
95 bool exit_requested
:1;
96 bool need_process_child
:1;
98 bool profile_delays
:1;
103 sd_event
**default_event_ptr
;
105 usec_t watchdog_last
, watchdog_period
;
109 LIST_HEAD(sd_event_source
, sources
);
111 usec_t last_run
, last_log
;
112 unsigned delays
[sizeof(usec_t
) * 8];
115 static thread_local sd_event
*default_event
= NULL
;
117 static void source_disconnect(sd_event_source
*s
);
118 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
120 static sd_event
*event_resolve(sd_event
*e
) {
121 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
124 static int pending_prioq_compare(const void *a
, const void *b
) {
125 const sd_event_source
*x
= a
, *y
= b
;
131 /* Enabled ones first */
132 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
134 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
137 /* Lower priority values first */
138 r
= CMP(x
->priority
, y
->priority
);
142 /* Older entries first */
143 return CMP(x
->pending_iteration
, y
->pending_iteration
);
146 static int prepare_prioq_compare(const void *a
, const void *b
) {
147 const sd_event_source
*x
= a
, *y
= b
;
153 /* Enabled ones first */
154 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
156 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
159 /* Move most recently prepared ones last, so that we can stop
160 * preparing as soon as we hit one that has already been
161 * prepared in the current iteration */
162 r
= CMP(x
->prepare_iteration
, y
->prepare_iteration
);
166 /* Lower priority values first */
167 return CMP(x
->priority
, y
->priority
);
170 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
171 const sd_event_source
*x
= a
, *y
= b
;
173 assert(EVENT_SOURCE_IS_TIME(x
->type
));
174 assert(x
->type
== y
->type
);
176 /* Enabled ones first */
177 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
179 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
182 /* Move the pending ones to the end */
183 if (!x
->pending
&& y
->pending
)
185 if (x
->pending
&& !y
->pending
)
189 return CMP(x
->time
.next
, y
->time
.next
);
192 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
193 return usec_add(s
->time
.next
, s
->time
.accuracy
);
196 static int latest_time_prioq_compare(const void *a
, const void *b
) {
197 const sd_event_source
*x
= a
, *y
= b
;
199 assert(EVENT_SOURCE_IS_TIME(x
->type
));
200 assert(x
->type
== y
->type
);
202 /* Enabled ones first */
203 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
205 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
208 /* Move the pending ones to the end */
209 if (!x
->pending
&& y
->pending
)
211 if (x
->pending
&& !y
->pending
)
215 return CMP(time_event_source_latest(x
), time_event_source_latest(y
));
218 static int exit_prioq_compare(const void *a
, const void *b
) {
219 const sd_event_source
*x
= a
, *y
= b
;
221 assert(x
->type
== SOURCE_EXIT
);
222 assert(y
->type
== SOURCE_EXIT
);
224 /* Enabled ones first */
225 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
227 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
230 /* Lower priority values first */
231 return CMP(x
->priority
, y
->priority
);
234 static void free_clock_data(struct clock_data
*d
) {
236 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
239 prioq_free(d
->earliest
);
240 prioq_free(d
->latest
);
243 static sd_event
*event_free(sd_event
*e
) {
248 while ((s
= e
->sources
)) {
250 source_disconnect(s
);
251 sd_event_source_unref(s
);
254 assert(e
->n_sources
== 0);
256 if (e
->default_event_ptr
)
257 *(e
->default_event_ptr
) = NULL
;
259 safe_close(e
->epoll_fd
);
260 safe_close(e
->watchdog_fd
);
262 free_clock_data(&e
->realtime
);
263 free_clock_data(&e
->boottime
);
264 free_clock_data(&e
->monotonic
);
265 free_clock_data(&e
->realtime_alarm
);
266 free_clock_data(&e
->boottime_alarm
);
268 prioq_free(e
->pending
);
269 prioq_free(e
->prepare
);
272 free(e
->signal_sources
);
273 hashmap_free(e
->signal_data
);
275 hashmap_free(e
->inotify_data
);
277 hashmap_free(e
->child_sources
);
278 set_free(e
->post_sources
);
283 _public_
int sd_event_new(sd_event
** ret
) {
287 assert_return(ret
, -EINVAL
);
289 e
= new(sd_event
, 1);
297 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
299 .realtime
.next
= USEC_INFINITY
,
300 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
302 .boottime
.next
= USEC_INFINITY
,
303 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
305 .monotonic
.next
= USEC_INFINITY
,
306 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
307 .realtime_alarm
.fd
= -1,
308 .realtime_alarm
.next
= USEC_INFINITY
,
309 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
310 .boottime_alarm
.fd
= -1,
311 .boottime_alarm
.next
= USEC_INFINITY
,
312 .perturb
= USEC_INFINITY
,
313 .original_pid
= getpid_cached(),
316 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
320 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
321 if (e
->epoll_fd
< 0) {
326 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
328 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
329 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
330 e
->profile_delays
= true;
341 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event
, sd_event
, event_free
);
343 _public_ sd_event_source
* sd_event_source_disable_unref(sd_event_source
*s
) {
345 (void) sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
346 return sd_event_source_unref(s
);
349 static bool event_pid_changed(sd_event
*e
) {
352 /* We don't support people creating an event loop and keeping
353 * it around over a fork(). Let's complain. */
355 return e
->original_pid
!= getpid_cached();
358 static void source_io_unregister(sd_event_source
*s
) {
362 assert(s
->type
== SOURCE_IO
);
364 if (event_pid_changed(s
->event
))
367 if (!s
->io
.registered
)
370 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
372 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
373 strna(s
->description
), event_source_type_to_string(s
->type
));
375 s
->io
.registered
= false;
378 static int source_io_register(
383 struct epoll_event ev
;
387 assert(s
->type
== SOURCE_IO
);
388 assert(enabled
!= SD_EVENT_OFF
);
390 ev
= (struct epoll_event
) {
391 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
395 if (s
->io
.registered
)
396 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
398 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
402 s
->io
.registered
= true;
407 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
411 case SOURCE_TIME_REALTIME
:
412 return CLOCK_REALTIME
;
414 case SOURCE_TIME_BOOTTIME
:
415 return CLOCK_BOOTTIME
;
417 case SOURCE_TIME_MONOTONIC
:
418 return CLOCK_MONOTONIC
;
420 case SOURCE_TIME_REALTIME_ALARM
:
421 return CLOCK_REALTIME_ALARM
;
423 case SOURCE_TIME_BOOTTIME_ALARM
:
424 return CLOCK_BOOTTIME_ALARM
;
427 return (clockid_t
) -1;
431 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
436 return SOURCE_TIME_REALTIME
;
439 return SOURCE_TIME_BOOTTIME
;
441 case CLOCK_MONOTONIC
:
442 return SOURCE_TIME_MONOTONIC
;
444 case CLOCK_REALTIME_ALARM
:
445 return SOURCE_TIME_REALTIME_ALARM
;
447 case CLOCK_BOOTTIME_ALARM
:
448 return SOURCE_TIME_BOOTTIME_ALARM
;
451 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
455 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
460 case SOURCE_TIME_REALTIME
:
463 case SOURCE_TIME_BOOTTIME
:
466 case SOURCE_TIME_MONOTONIC
:
467 return &e
->monotonic
;
469 case SOURCE_TIME_REALTIME_ALARM
:
470 return &e
->realtime_alarm
;
472 case SOURCE_TIME_BOOTTIME_ALARM
:
473 return &e
->boottime_alarm
;
480 static void event_free_signal_data(sd_event
*e
, struct signal_data
*d
) {
486 hashmap_remove(e
->signal_data
, &d
->priority
);
491 static int event_make_signal_data(
494 struct signal_data
**ret
) {
496 struct epoll_event ev
;
497 struct signal_data
*d
;
505 if (event_pid_changed(e
))
508 if (e
->signal_sources
&& e
->signal_sources
[sig
])
509 priority
= e
->signal_sources
[sig
]->priority
;
511 priority
= SD_EVENT_PRIORITY_NORMAL
;
513 d
= hashmap_get(e
->signal_data
, &priority
);
515 if (sigismember(&d
->sigset
, sig
) > 0) {
521 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
525 d
= new(struct signal_data
, 1);
529 *d
= (struct signal_data
) {
530 .wakeup
= WAKEUP_SIGNAL_DATA
,
532 .priority
= priority
,
535 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
545 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
547 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
561 d
->fd
= fd_move_above_stdio(r
);
563 ev
= (struct epoll_event
) {
568 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
581 event_free_signal_data(e
, d
);
586 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
590 /* Turns off the specified signal in the signal data
591 * object. If the signal mask of the object becomes empty that
594 if (sigismember(&d
->sigset
, sig
) == 0)
597 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
599 if (sigisemptyset(&d
->sigset
)) {
600 /* If all the mask is all-zero we can get rid of the structure */
601 event_free_signal_data(e
, d
);
607 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
608 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
611 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
612 struct signal_data
*d
;
613 static const int64_t zero_priority
= 0;
617 /* Rechecks if the specified signal is still something we are
618 * interested in. If not, we'll unmask it, and possibly drop
619 * the signalfd for it. */
621 if (sig
== SIGCHLD
&&
622 e
->n_enabled_child_sources
> 0)
625 if (e
->signal_sources
&&
626 e
->signal_sources
[sig
] &&
627 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
631 * The specified signal might be enabled in three different queues:
633 * 1) the one that belongs to the priority passed (if it is non-NULL)
634 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
635 * 3) the 0 priority (to cover the SIGCHLD case)
637 * Hence, let's remove it from all three here.
641 d
= hashmap_get(e
->signal_data
, priority
);
643 event_unmask_signal_data(e
, d
, sig
);
646 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
647 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
649 event_unmask_signal_data(e
, d
, sig
);
652 d
= hashmap_get(e
->signal_data
, &zero_priority
);
654 event_unmask_signal_data(e
, d
, sig
);
657 static void source_disconnect(sd_event_source
*s
) {
665 assert(s
->event
->n_sources
> 0);
671 source_io_unregister(s
);
675 case SOURCE_TIME_REALTIME
:
676 case SOURCE_TIME_BOOTTIME
:
677 case SOURCE_TIME_MONOTONIC
:
678 case SOURCE_TIME_REALTIME_ALARM
:
679 case SOURCE_TIME_BOOTTIME_ALARM
: {
680 struct clock_data
*d
;
682 d
= event_get_clock_data(s
->event
, s
->type
);
685 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
686 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
687 d
->needs_rearm
= true;
692 if (s
->signal
.sig
> 0) {
694 if (s
->event
->signal_sources
)
695 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
697 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
703 if (s
->child
.pid
> 0) {
704 if (s
->enabled
!= SD_EVENT_OFF
) {
705 assert(s
->event
->n_enabled_child_sources
> 0);
706 s
->event
->n_enabled_child_sources
--;
709 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
710 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
720 set_remove(s
->event
->post_sources
, s
);
724 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
727 case SOURCE_INOTIFY
: {
728 struct inode_data
*inode_data
;
730 inode_data
= s
->inotify
.inode_data
;
732 struct inotify_data
*inotify_data
;
733 assert_se(inotify_data
= inode_data
->inotify_data
);
735 /* Detach this event source from the inode object */
736 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
737 s
->inotify
.inode_data
= NULL
;
740 assert(inotify_data
->n_pending
> 0);
741 inotify_data
->n_pending
--;
744 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
745 * continued to being watched. That's because inotify doesn't really have an API for that: we
746 * can only change watch masks with access to the original inode either by fd or by path. But
747 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
748 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
749 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
750 * there), but given the need for open_by_handle_at() which is privileged and not universally
751 * available this would be quite an incomplete solution. Hence we go the other way, leave the
752 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
753 * anymore after reception. Yes, this sucks, but … Linux … */
755 /* Maybe release the inode data (and its inotify) */
756 event_gc_inode_data(s
->event
, inode_data
);
763 assert_not_reached("Wut? I shouldn't exist.");
767 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
770 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
775 LIST_REMOVE(sources
, event
->sources
, s
);
778 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
779 * pidfd associated with this event source, which we'll do only on source_free(). */
782 sd_event_unref(event
);
785 static void source_free(sd_event_source
*s
) {
788 source_disconnect(s
);
790 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
791 s
->io
.fd
= safe_close(s
->io
.fd
);
793 if (s
->destroy_callback
)
794 s
->destroy_callback(s
->userdata
);
796 free(s
->description
);
799 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source
*, source_free
);
801 static int source_set_pending(sd_event_source
*s
, bool b
) {
805 assert(s
->type
!= SOURCE_EXIT
);
813 s
->pending_iteration
= s
->event
->iteration
;
815 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
821 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
823 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
824 struct clock_data
*d
;
826 d
= event_get_clock_data(s
->event
, s
->type
);
829 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
830 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
831 d
->needs_rearm
= true;
834 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
835 struct signal_data
*d
;
837 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
838 if (d
&& d
->current
== s
)
842 if (s
->type
== SOURCE_INOTIFY
) {
844 assert(s
->inotify
.inode_data
);
845 assert(s
->inotify
.inode_data
->inotify_data
);
848 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
850 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
851 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
858 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
863 s
= new(sd_event_source
, 1);
867 *s
= (struct sd_event_source
) {
870 .floating
= floating
,
872 .pending_index
= PRIOQ_IDX_NULL
,
873 .prepare_index
= PRIOQ_IDX_NULL
,
879 LIST_PREPEND(sources
, e
->sources
, s
);
885 _public_
int sd_event_add_io(
887 sd_event_source
**ret
,
890 sd_event_io_handler_t callback
,
893 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
896 assert_return(e
, -EINVAL
);
897 assert_return(e
= event_resolve(e
), -ENOPKG
);
898 assert_return(fd
>= 0, -EBADF
);
899 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
900 assert_return(callback
, -EINVAL
);
901 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
902 assert_return(!event_pid_changed(e
), -ECHILD
);
904 s
= source_new(e
, !ret
, SOURCE_IO
);
908 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
910 s
->io
.events
= events
;
911 s
->io
.callback
= callback
;
912 s
->userdata
= userdata
;
913 s
->enabled
= SD_EVENT_ON
;
915 r
= source_io_register(s
, s
->enabled
, events
);
926 static void initialize_perturb(sd_event
*e
) {
927 sd_id128_t bootid
= {};
929 /* When we sleep for longer, we try to realign the wakeup to
930 the same time within each minute/second/250ms, so that
931 events all across the system can be coalesced into a single
932 CPU wakeup. However, let's take some system-specific
933 randomness for this value, so that in a network of systems
934 with synced clocks timer events are distributed a
935 bit. Here, we calculate a perturbation usec offset from the
938 if (_likely_(e
->perturb
!= USEC_INFINITY
))
941 if (sd_id128_get_boot(&bootid
) >= 0)
942 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
945 static int event_setup_timer_fd(
947 struct clock_data
*d
,
950 struct epoll_event ev
;
956 if (_likely_(d
->fd
>= 0))
959 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
963 fd
= fd_move_above_stdio(fd
);
965 ev
= (struct epoll_event
) {
970 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
980 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
983 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
986 _public_
int sd_event_add_time(
988 sd_event_source
**ret
,
992 sd_event_time_handler_t callback
,
995 EventSourceType type
;
996 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
997 struct clock_data
*d
;
1000 assert_return(e
, -EINVAL
);
1001 assert_return(e
= event_resolve(e
), -ENOPKG
);
1002 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1003 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1004 assert_return(!event_pid_changed(e
), -ECHILD
);
1006 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1009 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1014 callback
= time_exit_callback
;
1016 d
= event_get_clock_data(e
, type
);
1019 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1023 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1028 r
= event_setup_timer_fd(e
, d
, clock
);
1033 s
= source_new(e
, !ret
, type
);
1037 s
->time
.next
= usec
;
1038 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1039 s
->time
.callback
= callback
;
1040 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1041 s
->userdata
= userdata
;
1042 s
->enabled
= SD_EVENT_ONESHOT
;
1044 d
->needs_rearm
= true;
1046 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1050 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1061 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1064 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1067 _public_
int sd_event_add_signal(
1069 sd_event_source
**ret
,
1071 sd_event_signal_handler_t callback
,
1074 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1075 struct signal_data
*d
;
1079 assert_return(e
, -EINVAL
);
1080 assert_return(e
= event_resolve(e
), -ENOPKG
);
1081 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1082 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1083 assert_return(!event_pid_changed(e
), -ECHILD
);
1086 callback
= signal_exit_callback
;
1088 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1092 if (!sigismember(&ss
, sig
))
1095 if (!e
->signal_sources
) {
1096 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1097 if (!e
->signal_sources
)
1099 } else if (e
->signal_sources
[sig
])
1102 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1106 s
->signal
.sig
= sig
;
1107 s
->signal
.callback
= callback
;
1108 s
->userdata
= userdata
;
1109 s
->enabled
= SD_EVENT_ON
;
1111 e
->signal_sources
[sig
] = s
;
1113 r
= event_make_signal_data(e
, sig
, &d
);
1117 /* Use the signal name as description for the event source by default */
1118 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1127 _public_
int sd_event_add_child(
1129 sd_event_source
**ret
,
1132 sd_event_child_handler_t callback
,
1135 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1138 assert_return(e
, -EINVAL
);
1139 assert_return(e
= event_resolve(e
), -ENOPKG
);
1140 assert_return(pid
> 1, -EINVAL
);
1141 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1142 assert_return(options
!= 0, -EINVAL
);
1143 assert_return(callback
, -EINVAL
);
1144 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1145 assert_return(!event_pid_changed(e
), -ECHILD
);
1147 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1151 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1154 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1159 s
->child
.options
= options
;
1160 s
->child
.callback
= callback
;
1161 s
->userdata
= userdata
;
1162 s
->enabled
= SD_EVENT_ONESHOT
;
1164 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1168 e
->n_enabled_child_sources
++;
1170 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1172 e
->n_enabled_child_sources
--;
1176 e
->need_process_child
= true;
1185 _public_
int sd_event_add_defer(
1187 sd_event_source
**ret
,
1188 sd_event_handler_t callback
,
1191 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1194 assert_return(e
, -EINVAL
);
1195 assert_return(e
= event_resolve(e
), -ENOPKG
);
1196 assert_return(callback
, -EINVAL
);
1197 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1198 assert_return(!event_pid_changed(e
), -ECHILD
);
1200 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1204 s
->defer
.callback
= callback
;
1205 s
->userdata
= userdata
;
1206 s
->enabled
= SD_EVENT_ONESHOT
;
1208 r
= source_set_pending(s
, true);
1219 _public_
int sd_event_add_post(
1221 sd_event_source
**ret
,
1222 sd_event_handler_t callback
,
1225 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1228 assert_return(e
, -EINVAL
);
1229 assert_return(e
= event_resolve(e
), -ENOPKG
);
1230 assert_return(callback
, -EINVAL
);
1231 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1232 assert_return(!event_pid_changed(e
), -ECHILD
);
1234 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1238 s
= source_new(e
, !ret
, SOURCE_POST
);
1242 s
->post
.callback
= callback
;
1243 s
->userdata
= userdata
;
1244 s
->enabled
= SD_EVENT_ON
;
1246 r
= set_put(e
->post_sources
, s
);
1257 _public_
int sd_event_add_exit(
1259 sd_event_source
**ret
,
1260 sd_event_handler_t callback
,
1263 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1266 assert_return(e
, -EINVAL
);
1267 assert_return(e
= event_resolve(e
), -ENOPKG
);
1268 assert_return(callback
, -EINVAL
);
1269 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1270 assert_return(!event_pid_changed(e
), -ECHILD
);
1272 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1276 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1280 s
->exit
.callback
= callback
;
1281 s
->userdata
= userdata
;
1282 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1283 s
->enabled
= SD_EVENT_ONESHOT
;
1285 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1296 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
1302 assert(hashmap_isempty(d
->inodes
));
1303 assert(hashmap_isempty(d
->wd
));
1305 if (d
->buffer_filled
> 0)
1306 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
1308 hashmap_free(d
->inodes
);
1309 hashmap_free(d
->wd
);
1311 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
1314 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
1315 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
1322 static int event_make_inotify_data(
1325 struct inotify_data
**ret
) {
1327 _cleanup_close_
int fd
= -1;
1328 struct inotify_data
*d
;
1329 struct epoll_event ev
;
1334 d
= hashmap_get(e
->inotify_data
, &priority
);
1341 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
1345 fd
= fd_move_above_stdio(fd
);
1347 r
= hashmap_ensure_allocated(&e
->inotify_data
, &uint64_hash_ops
);
1351 d
= new(struct inotify_data
, 1);
1355 *d
= (struct inotify_data
) {
1356 .wakeup
= WAKEUP_INOTIFY_DATA
,
1358 .priority
= priority
,
1361 r
= hashmap_put(e
->inotify_data
, &d
->priority
, d
);
1363 d
->fd
= safe_close(d
->fd
);
1368 ev
= (struct epoll_event
) {
1373 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
1375 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1376 * remove the fd from the epoll first, which we don't want as we couldn't
1377 * add it in the first place. */
1378 event_free_inotify_data(e
, d
);
1388 static int inode_data_compare(const struct inode_data
*x
, const struct inode_data
*y
) {
1394 r
= CMP(x
->dev
, y
->dev
);
1398 return CMP(x
->ino
, y
->ino
);
1401 static void inode_data_hash_func(const struct inode_data
*d
, struct siphash
*state
) {
1404 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
1405 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
1408 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops
, struct inode_data
, inode_data_hash_func
, inode_data_compare
);
1410 static void event_free_inode_data(
1412 struct inode_data
*d
) {
1419 assert(!d
->event_sources
);
1422 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
1426 if (d
->inotify_data
) {
1429 if (d
->inotify_data
->fd
>= 0) {
1430 /* So here's a problem. At the time this runs the watch descriptor might already be
1431 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1432 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1433 * likely case to happen. */
1435 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
1436 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
1439 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
1442 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
1448 static void event_gc_inode_data(
1450 struct inode_data
*d
) {
1452 struct inotify_data
*inotify_data
;
1459 if (d
->event_sources
)
1462 inotify_data
= d
->inotify_data
;
1463 event_free_inode_data(e
, d
);
1465 if (inotify_data
&& hashmap_isempty(inotify_data
->inodes
))
1466 event_free_inotify_data(e
, inotify_data
);
1469 static int event_make_inode_data(
1471 struct inotify_data
*inotify_data
,
1474 struct inode_data
**ret
) {
1476 struct inode_data
*d
, key
;
1480 assert(inotify_data
);
1482 key
= (struct inode_data
) {
1487 d
= hashmap_get(inotify_data
->inodes
, &key
);
1495 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
1499 d
= new(struct inode_data
, 1);
1503 *d
= (struct inode_data
) {
1508 .inotify_data
= inotify_data
,
1511 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
1523 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
1524 bool excl_unlink
= true;
1525 uint32_t combined
= 0;
1530 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1531 * the IN_EXCL_UNLINK flag is ANDed instead.
1533 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1534 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1535 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1536 * events we don't care for client-side. */
1538 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
1540 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
1541 excl_unlink
= false;
1543 combined
|= s
->inotify
.mask
;
1546 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
1549 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
1550 uint32_t combined_mask
;
1556 combined_mask
= inode_data_determine_mask(d
);
1558 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
1561 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
1565 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
1570 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
1572 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
1578 } else if (d
->wd
!= wd
) {
1580 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1581 (void) inotify_rm_watch(d
->fd
, wd
);
1585 d
->combined_mask
= combined_mask
;
1589 _public_
int sd_event_add_inotify(
1591 sd_event_source
**ret
,
1594 sd_event_inotify_handler_t callback
,
1597 struct inotify_data
*inotify_data
= NULL
;
1598 struct inode_data
*inode_data
= NULL
;
1599 _cleanup_close_
int fd
= -1;
1600 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1604 assert_return(e
, -EINVAL
);
1605 assert_return(e
= event_resolve(e
), -ENOPKG
);
1606 assert_return(path
, -EINVAL
);
1607 assert_return(callback
, -EINVAL
);
1608 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1609 assert_return(!event_pid_changed(e
), -ECHILD
);
1611 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1612 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1613 * the user can't use them for us. */
1614 if (mask
& IN_MASK_ADD
)
1617 fd
= open(path
, O_PATH
|O_CLOEXEC
|
1618 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0)|
1619 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
1623 if (fstat(fd
, &st
) < 0)
1626 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
1630 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
1631 s
->inotify
.mask
= mask
;
1632 s
->inotify
.callback
= callback
;
1633 s
->userdata
= userdata
;
1635 /* Allocate an inotify object for this priority, and an inode object within it */
1636 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
1640 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
1642 event_free_inotify_data(e
, inotify_data
);
1646 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1647 * the event source, until then, for which we need the original inode. */
1648 if (inode_data
->fd
< 0) {
1649 inode_data
->fd
= TAKE_FD(fd
);
1650 LIST_PREPEND(to_close
, e
->inode_data_to_close
, inode_data
);
1653 /* Link our event source to the inode data object */
1654 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1655 s
->inotify
.inode_data
= inode_data
;
1657 /* Actually realize the watch now */
1658 r
= inode_data_realize_watch(e
, inode_data
);
1662 (void) sd_event_source_set_description(s
, path
);
1671 static sd_event_source
* event_source_free(sd_event_source
*s
) {
1675 /* Here's a special hack: when we are called from a
1676 * dispatch handler we won't free the event source
1677 * immediately, but we will detach the fd from the
1678 * epoll. This way it is safe for the caller to unref
1679 * the event source and immediately close the fd, but
1680 * we still retain a valid event source object after
1683 if (s
->dispatching
) {
1684 if (s
->type
== SOURCE_IO
)
1685 source_io_unregister(s
);
1687 source_disconnect(s
);
1694 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source
, sd_event_source
, event_source_free
);
1696 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1697 assert_return(s
, -EINVAL
);
1698 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1700 return free_and_strdup(&s
->description
, description
);
1703 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1704 assert_return(s
, -EINVAL
);
1705 assert_return(description
, -EINVAL
);
1706 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1708 if (!s
->description
)
1711 *description
= s
->description
;
1715 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1716 assert_return(s
, NULL
);
1721 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1722 assert_return(s
, -EINVAL
);
1723 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1724 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1725 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1730 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1731 assert_return(s
, -EINVAL
);
1732 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1733 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1738 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1741 assert_return(s
, -EINVAL
);
1742 assert_return(fd
>= 0, -EBADF
);
1743 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1744 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1749 if (s
->enabled
== SD_EVENT_OFF
) {
1751 s
->io
.registered
= false;
1755 saved_fd
= s
->io
.fd
;
1756 assert(s
->io
.registered
);
1759 s
->io
.registered
= false;
1761 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1763 s
->io
.fd
= saved_fd
;
1764 s
->io
.registered
= true;
1768 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1774 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
1775 assert_return(s
, -EINVAL
);
1776 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1781 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
1782 assert_return(s
, -EINVAL
);
1783 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1789 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1790 assert_return(s
, -EINVAL
);
1791 assert_return(events
, -EINVAL
);
1792 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1793 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1795 *events
= s
->io
.events
;
1799 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1802 assert_return(s
, -EINVAL
);
1803 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1804 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1805 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1806 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1808 /* edge-triggered updates are never skipped, so we can reset edges */
1809 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1812 r
= source_set_pending(s
, false);
1816 if (s
->enabled
!= SD_EVENT_OFF
) {
1817 r
= source_io_register(s
, s
->enabled
, events
);
1822 s
->io
.events
= events
;
1827 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1828 assert_return(s
, -EINVAL
);
1829 assert_return(revents
, -EINVAL
);
1830 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1831 assert_return(s
->pending
, -ENODATA
);
1832 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1834 *revents
= s
->io
.revents
;
1838 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1839 assert_return(s
, -EINVAL
);
1840 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1841 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1843 return s
->signal
.sig
;
1846 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1847 assert_return(s
, -EINVAL
);
1848 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1850 *priority
= s
->priority
;
1854 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1855 bool rm_inotify
= false, rm_inode
= false;
1856 struct inotify_data
*new_inotify_data
= NULL
;
1857 struct inode_data
*new_inode_data
= NULL
;
1860 assert_return(s
, -EINVAL
);
1861 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1862 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1864 if (s
->priority
== priority
)
1867 if (s
->type
== SOURCE_INOTIFY
) {
1868 struct inode_data
*old_inode_data
;
1870 assert(s
->inotify
.inode_data
);
1871 old_inode_data
= s
->inotify
.inode_data
;
1873 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1874 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1875 * events we allow priority changes only until the first following iteration. */
1876 if (old_inode_data
->fd
< 0)
1879 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
1884 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
1889 if (new_inode_data
->fd
< 0) {
1890 /* Duplicate the fd for the new inode object if we don't have any yet */
1891 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
1892 if (new_inode_data
->fd
< 0) {
1897 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close
, new_inode_data
);
1900 /* Move the event source to the new inode data structure */
1901 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
1902 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
1903 s
->inotify
.inode_data
= new_inode_data
;
1905 /* Now create the new watch */
1906 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
1909 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
1910 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
1911 s
->inotify
.inode_data
= old_inode_data
;
1915 s
->priority
= priority
;
1917 event_gc_inode_data(s
->event
, old_inode_data
);
1919 } else if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1920 struct signal_data
*old
, *d
;
1922 /* Move us from the signalfd belonging to the old
1923 * priority to the signalfd of the new priority */
1925 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1927 s
->priority
= priority
;
1929 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1931 s
->priority
= old
->priority
;
1935 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1937 s
->priority
= priority
;
1940 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1943 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1945 if (s
->type
== SOURCE_EXIT
)
1946 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1952 event_free_inode_data(s
->event
, new_inode_data
);
1955 event_free_inotify_data(s
->event
, new_inotify_data
);
1960 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1961 assert_return(s
, -EINVAL
);
1962 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1966 return s
->enabled
!= SD_EVENT_OFF
;
1969 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1972 assert_return(s
, -EINVAL
);
1973 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
1974 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1976 /* If we are dead anyway, we are fine with turning off
1977 * sources, but everything else needs to fail. */
1978 if (s
->event
->state
== SD_EVENT_FINISHED
)
1979 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1981 if (s
->enabled
== m
)
1984 if (m
== SD_EVENT_OFF
) {
1986 /* Unset the pending flag when this event source is disabled */
1987 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
1988 r
= source_set_pending(s
, false);
1996 source_io_unregister(s
);
2000 case SOURCE_TIME_REALTIME
:
2001 case SOURCE_TIME_BOOTTIME
:
2002 case SOURCE_TIME_MONOTONIC
:
2003 case SOURCE_TIME_REALTIME_ALARM
:
2004 case SOURCE_TIME_BOOTTIME_ALARM
: {
2005 struct clock_data
*d
;
2008 d
= event_get_clock_data(s
->event
, s
->type
);
2011 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2012 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2013 d
->needs_rearm
= true;
2020 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2026 assert(s
->event
->n_enabled_child_sources
> 0);
2027 s
->event
->n_enabled_child_sources
--;
2029 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2034 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2039 case SOURCE_INOTIFY
:
2044 assert_not_reached("Wut? I shouldn't exist.");
2049 /* Unset the pending flag when this event source is enabled */
2050 if (s
->enabled
== SD_EVENT_OFF
&& !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2051 r
= source_set_pending(s
, false);
2059 r
= source_io_register(s
, m
, s
->io
.events
);
2066 case SOURCE_TIME_REALTIME
:
2067 case SOURCE_TIME_BOOTTIME
:
2068 case SOURCE_TIME_MONOTONIC
:
2069 case SOURCE_TIME_REALTIME_ALARM
:
2070 case SOURCE_TIME_BOOTTIME_ALARM
: {
2071 struct clock_data
*d
;
2074 d
= event_get_clock_data(s
->event
, s
->type
);
2077 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2078 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2079 d
->needs_rearm
= true;
2087 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2089 s
->enabled
= SD_EVENT_OFF
;
2090 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2098 if (s
->enabled
== SD_EVENT_OFF
)
2099 s
->event
->n_enabled_child_sources
++;
2103 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2105 s
->enabled
= SD_EVENT_OFF
;
2106 s
->event
->n_enabled_child_sources
--;
2107 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2115 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2120 case SOURCE_INOTIFY
:
2125 assert_not_reached("Wut? I shouldn't exist.");
2130 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2133 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2138 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
2139 assert_return(s
, -EINVAL
);
2140 assert_return(usec
, -EINVAL
);
2141 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2142 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2144 *usec
= s
->time
.next
;
2148 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
2149 struct clock_data
*d
;
2152 assert_return(s
, -EINVAL
);
2153 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2154 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2155 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2157 r
= source_set_pending(s
, false);
2161 s
->time
.next
= usec
;
2163 d
= event_get_clock_data(s
->event
, s
->type
);
2166 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2167 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2168 d
->needs_rearm
= true;
2173 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
2174 assert_return(s
, -EINVAL
);
2175 assert_return(usec
, -EINVAL
);
2176 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2177 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2179 *usec
= s
->time
.accuracy
;
2183 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
2184 struct clock_data
*d
;
2187 assert_return(s
, -EINVAL
);
2188 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
2189 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2190 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2191 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2193 r
= source_set_pending(s
, false);
2198 usec
= DEFAULT_ACCURACY_USEC
;
2200 s
->time
.accuracy
= usec
;
2202 d
= event_get_clock_data(s
->event
, s
->type
);
2205 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2206 d
->needs_rearm
= true;
2211 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
2212 assert_return(s
, -EINVAL
);
2213 assert_return(clock
, -EINVAL
);
2214 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2215 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2217 *clock
= event_source_type_to_clock(s
->type
);
2221 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
2222 assert_return(s
, -EINVAL
);
2223 assert_return(pid
, -EINVAL
);
2224 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2225 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2227 *pid
= s
->child
.pid
;
2231 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
2232 assert_return(s
, -EINVAL
);
2233 assert_return(mask
, -EINVAL
);
2234 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
2235 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2237 *mask
= s
->inotify
.mask
;
2241 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
2244 assert_return(s
, -EINVAL
);
2245 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2246 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2247 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2249 if (s
->prepare
== callback
)
2252 if (callback
&& s
->prepare
) {
2253 s
->prepare
= callback
;
2257 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
2261 s
->prepare
= callback
;
2264 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
2268 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
2273 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
2274 assert_return(s
, NULL
);
2279 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
2282 assert_return(s
, NULL
);
2285 s
->userdata
= userdata
;
2290 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
2297 if (a
>= USEC_INFINITY
)
2298 return USEC_INFINITY
;
2303 initialize_perturb(e
);
2306 Find a good time to wake up again between times a and b. We
2307 have two goals here:
2309 a) We want to wake up as seldom as possible, hence prefer
2310 later times over earlier times.
2312 b) But if we have to wake up, then let's make sure to
2313 dispatch as much as possible on the entire system.
2315 We implement this by waking up everywhere at the same time
2316 within any given minute if we can, synchronised via the
2317 perturbation value determined from the boot ID. If we can't,
2318 then we try to find the same spot in every 10s, then 1s and
2319 then 250ms step. Otherwise, we pick the last possible time
2323 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
2325 if (_unlikely_(c
< USEC_PER_MINUTE
))
2328 c
-= USEC_PER_MINUTE
;
2334 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
2336 if (_unlikely_(c
< USEC_PER_SEC
*10))
2339 c
-= USEC_PER_SEC
*10;
2345 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
2347 if (_unlikely_(c
< USEC_PER_SEC
))
2356 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
2358 if (_unlikely_(c
< USEC_PER_MSEC
*250))
2361 c
-= USEC_PER_MSEC
*250;
2370 static int event_arm_timer(
2372 struct clock_data
*d
) {
2374 struct itimerspec its
= {};
2375 sd_event_source
*a
, *b
;
2382 if (!d
->needs_rearm
)
2385 d
->needs_rearm
= false;
2387 a
= prioq_peek(d
->earliest
);
2388 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2393 if (d
->next
== USEC_INFINITY
)
2397 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2401 d
->next
= USEC_INFINITY
;
2405 b
= prioq_peek(d
->latest
);
2406 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2408 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2412 assert_se(d
->fd
>= 0);
2415 /* We don' want to disarm here, just mean some time looooong ago. */
2416 its
.it_value
.tv_sec
= 0;
2417 its
.it_value
.tv_nsec
= 1;
2419 timespec_store(&its
.it_value
, t
);
2421 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2429 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2432 assert(s
->type
== SOURCE_IO
);
2434 /* If the event source was already pending, we just OR in the
2435 * new revents, otherwise we reset the value. The ORing is
2436 * necessary to handle EPOLLONESHOT events properly where
2437 * readability might happen independently of writability, and
2438 * we need to keep track of both */
2441 s
->io
.revents
|= revents
;
2443 s
->io
.revents
= revents
;
2445 return source_set_pending(s
, true);
2448 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2455 assert_return(events
== EPOLLIN
, -EIO
);
2457 ss
= read(fd
, &x
, sizeof(x
));
2459 if (IN_SET(errno
, EAGAIN
, EINTR
))
2465 if (_unlikely_(ss
!= sizeof(x
)))
2469 *next
= USEC_INFINITY
;
2474 static int process_timer(
2477 struct clock_data
*d
) {
2486 s
= prioq_peek(d
->earliest
);
2489 s
->enabled
== SD_EVENT_OFF
||
2493 r
= source_set_pending(s
, true);
2497 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2498 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2499 d
->needs_rearm
= true;
2505 static int process_child(sd_event
*e
) {
2512 e
->need_process_child
= false;
2515 So, this is ugly. We iteratively invoke waitid() with P_PID
2516 + WNOHANG for each PID we wait for, instead of using
2517 P_ALL. This is because we only want to get child
2518 information of very specific child processes, and not all
2519 of them. We might not have processed the SIGCHLD even of a
2520 previous invocation and we don't want to maintain a
2521 unbounded *per-child* event queue, hence we really don't
2522 want anything flushed out of the kernel's queue that we
2523 don't care about. Since this is O(n) this means that if you
2524 have a lot of processes you probably want to handle SIGCHLD
2527 We do not reap the children here (by using WNOWAIT), this
2528 is only done after the event source is dispatched so that
2529 the callback still sees the process as a zombie.
2532 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2533 assert(s
->type
== SOURCE_CHILD
);
2538 if (s
->enabled
== SD_EVENT_OFF
)
2541 zero(s
->child
.siginfo
);
2542 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2543 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2547 if (s
->child
.siginfo
.si_pid
!= 0) {
2548 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2550 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2551 /* If the child isn't dead then let's
2552 * immediately remove the state change
2553 * from the queue, since there's no
2554 * benefit in leaving it queued */
2556 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2557 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2560 r
= source_set_pending(s
, true);
2569 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2570 bool read_one
= false;
2575 assert_return(events
== EPOLLIN
, -EIO
);
2577 /* If there's a signal queued on this priority and SIGCHLD is
2578 on this priority too, then make sure to recheck the
2579 children we watch. This is because we only ever dequeue
2580 the first signal per priority, and if we dequeue one, and
2581 SIGCHLD might be enqueued later we wouldn't know, but we
2582 might have higher priority children we care about hence we
2583 need to check that explicitly. */
2585 if (sigismember(&d
->sigset
, SIGCHLD
))
2586 e
->need_process_child
= true;
2588 /* If there's already an event source pending for this
2589 * priority we don't read another */
2594 struct signalfd_siginfo si
;
2596 sd_event_source
*s
= NULL
;
2598 n
= read(d
->fd
, &si
, sizeof(si
));
2600 if (IN_SET(errno
, EAGAIN
, EINTR
))
2606 if (_unlikely_(n
!= sizeof(si
)))
2609 assert(SIGNAL_VALID(si
.ssi_signo
));
2613 if (e
->signal_sources
)
2614 s
= e
->signal_sources
[si
.ssi_signo
];
2620 s
->signal
.siginfo
= si
;
2623 r
= source_set_pending(s
, true);
2631 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
) {
2637 assert_return(revents
== EPOLLIN
, -EIO
);
2639 /* If there's already an event source pending for this priority, don't read another */
2640 if (d
->n_pending
> 0)
2643 /* Is the read buffer non-empty? If so, let's not read more */
2644 if (d
->buffer_filled
> 0)
2647 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
2649 if (IN_SET(errno
, EAGAIN
, EINTR
))
2656 d
->buffer_filled
= (size_t) n
;
2657 LIST_PREPEND(buffered
, e
->inotify_data_buffered
, d
);
2662 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
2665 assert(sz
<= d
->buffer_filled
);
2670 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2671 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
2672 d
->buffer_filled
-= sz
;
2674 if (d
->buffer_filled
== 0)
2675 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
2678 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
2684 /* If there's already an event source pending for this priority, don't read another */
2685 if (d
->n_pending
> 0)
2688 while (d
->buffer_filled
> 0) {
2691 /* Let's validate that the event structures are complete */
2692 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
2695 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2696 if (d
->buffer_filled
< sz
)
2699 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
2700 struct inode_data
*inode_data
;
2703 /* The queue overran, let's pass this event to all event sources connected to this inotify
2706 HASHMAP_FOREACH(inode_data
, d
->inodes
, i
) {
2709 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2711 if (s
->enabled
== SD_EVENT_OFF
)
2714 r
= source_set_pending(s
, true);
2720 struct inode_data
*inode_data
;
2723 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2724 * our watch descriptor table. */
2725 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
2727 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2729 event_inotify_data_drop(e
, d
, sz
);
2733 /* The watch descriptor was removed by the kernel, let's drop it here too */
2734 inode_data
->wd
= -1;
2736 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2738 event_inotify_data_drop(e
, d
, sz
);
2743 /* Trigger all event sources that are interested in these events. Also trigger all event
2744 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2745 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2747 if (s
->enabled
== SD_EVENT_OFF
)
2750 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
2751 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
2754 r
= source_set_pending(s
, true);
2760 /* Something pending now? If so, let's finish, otherwise let's read more. */
2761 if (d
->n_pending
> 0)
2768 static int process_inotify(sd_event
*e
) {
2769 struct inotify_data
*d
;
2774 LIST_FOREACH(buffered
, d
, e
->inotify_data_buffered
) {
2775 r
= event_inotify_data_process(e
, d
);
2785 static int source_dispatch(sd_event_source
*s
) {
2786 EventSourceType saved_type
;
2790 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2792 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2794 saved_type
= s
->type
;
2796 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2797 r
= source_set_pending(s
, false);
2802 if (s
->type
!= SOURCE_POST
) {
2806 /* If we execute a non-post source, let's mark all
2807 * post sources as pending */
2809 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2810 if (z
->enabled
== SD_EVENT_OFF
)
2813 r
= source_set_pending(z
, true);
2819 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2820 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2825 s
->dispatching
= true;
2830 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2833 case SOURCE_TIME_REALTIME
:
2834 case SOURCE_TIME_BOOTTIME
:
2835 case SOURCE_TIME_MONOTONIC
:
2836 case SOURCE_TIME_REALTIME_ALARM
:
2837 case SOURCE_TIME_BOOTTIME_ALARM
:
2838 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2842 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2845 case SOURCE_CHILD
: {
2848 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2850 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2852 /* Now, reap the PID for good. */
2854 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2860 r
= s
->defer
.callback(s
, s
->userdata
);
2864 r
= s
->post
.callback(s
, s
->userdata
);
2868 r
= s
->exit
.callback(s
, s
->userdata
);
2871 case SOURCE_INOTIFY
: {
2872 struct sd_event
*e
= s
->event
;
2873 struct inotify_data
*d
;
2876 assert(s
->inotify
.inode_data
);
2877 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
2879 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
2880 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2881 assert(d
->buffer_filled
>= sz
);
2883 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
2885 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2887 if (d
->n_pending
== 0)
2888 event_inotify_data_drop(e
, d
, sz
);
2893 case SOURCE_WATCHDOG
:
2894 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2895 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2896 assert_not_reached("Wut? I shouldn't exist.");
2899 s
->dispatching
= false;
2902 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
2903 strna(s
->description
), event_source_type_to_string(saved_type
));
2908 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2913 static int event_prepare(sd_event
*e
) {
2921 s
= prioq_peek(e
->prepare
);
2922 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2925 s
->prepare_iteration
= e
->iteration
;
2926 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2932 s
->dispatching
= true;
2933 r
= s
->prepare(s
, s
->userdata
);
2934 s
->dispatching
= false;
2937 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2938 strna(s
->description
), event_source_type_to_string(s
->type
));
2943 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2949 static int dispatch_exit(sd_event
*e
) {
2951 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
2956 p
= prioq_peek(e
->exit
);
2957 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2958 e
->state
= SD_EVENT_FINISHED
;
2962 ref
= sd_event_ref(e
);
2964 e
->state
= SD_EVENT_EXITING
;
2965 r
= source_dispatch(p
);
2966 e
->state
= SD_EVENT_INITIAL
;
2970 static sd_event_source
* event_next_pending(sd_event
*e
) {
2975 p
= prioq_peek(e
->pending
);
2979 if (p
->enabled
== SD_EVENT_OFF
)
2985 static int arm_watchdog(sd_event
*e
) {
2986 struct itimerspec its
= {};
2991 assert(e
->watchdog_fd
>= 0);
2993 t
= sleep_between(e
,
2994 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2995 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2997 timespec_store(&its
.it_value
, t
);
2999 /* Make sure we never set the watchdog to 0, which tells the
3000 * kernel to disable it. */
3001 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
3002 its
.it_value
.tv_nsec
= 1;
3004 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
3011 static int process_watchdog(sd_event
*e
) {
3017 /* Don't notify watchdog too often */
3018 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
3021 sd_notify(false, "WATCHDOG=1");
3022 e
->watchdog_last
= e
->timestamp
.monotonic
;
3024 return arm_watchdog(e
);
3027 static void event_close_inode_data_fds(sd_event
*e
) {
3028 struct inode_data
*d
;
3032 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3033 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3034 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3035 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3038 while ((d
= e
->inode_data_to_close
)) {
3040 d
->fd
= safe_close(d
->fd
);
3042 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
3046 _public_
int sd_event_prepare(sd_event
*e
) {
3049 assert_return(e
, -EINVAL
);
3050 assert_return(e
= event_resolve(e
), -ENOPKG
);
3051 assert_return(!event_pid_changed(e
), -ECHILD
);
3052 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3053 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3055 if (e
->exit_requested
)
3060 e
->state
= SD_EVENT_PREPARING
;
3061 r
= event_prepare(e
);
3062 e
->state
= SD_EVENT_INITIAL
;
3066 r
= event_arm_timer(e
, &e
->realtime
);
3070 r
= event_arm_timer(e
, &e
->boottime
);
3074 r
= event_arm_timer(e
, &e
->monotonic
);
3078 r
= event_arm_timer(e
, &e
->realtime_alarm
);
3082 r
= event_arm_timer(e
, &e
->boottime_alarm
);
3086 event_close_inode_data_fds(e
);
3088 if (event_next_pending(e
) || e
->need_process_child
)
3091 e
->state
= SD_EVENT_ARMED
;
3096 e
->state
= SD_EVENT_ARMED
;
3097 r
= sd_event_wait(e
, 0);
3099 e
->state
= SD_EVENT_ARMED
;
3104 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
3105 struct epoll_event
*ev_queue
;
3106 unsigned ev_queue_max
;
3109 assert_return(e
, -EINVAL
);
3110 assert_return(e
= event_resolve(e
), -ENOPKG
);
3111 assert_return(!event_pid_changed(e
), -ECHILD
);
3112 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3113 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
3115 if (e
->exit_requested
) {
3116 e
->state
= SD_EVENT_PENDING
;
3120 ev_queue_max
= MAX(e
->n_sources
, 1u);
3121 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
3123 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3124 if (e
->inotify_data_buffered
)
3127 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
3128 timeout
== (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout
, USEC_PER_MSEC
));
3130 if (errno
== EINTR
) {
3131 e
->state
= SD_EVENT_PENDING
;
3139 triple_timestamp_get(&e
->timestamp
);
3141 for (i
= 0; i
< m
; i
++) {
3143 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
3144 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
3146 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
3150 case WAKEUP_EVENT_SOURCE
:
3151 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3154 case WAKEUP_CLOCK_DATA
: {
3155 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
3156 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
3160 case WAKEUP_SIGNAL_DATA
:
3161 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3164 case WAKEUP_INOTIFY_DATA
:
3165 r
= event_inotify_data_read(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3169 assert_not_reached("Invalid wake-up pointer");
3176 r
= process_watchdog(e
);
3180 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
3184 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
3188 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
3192 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
3196 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
3200 if (e
->need_process_child
) {
3201 r
= process_child(e
);
3206 r
= process_inotify(e
);
3210 if (event_next_pending(e
)) {
3211 e
->state
= SD_EVENT_PENDING
;
3219 e
->state
= SD_EVENT_INITIAL
;
3224 _public_
int sd_event_dispatch(sd_event
*e
) {
3228 assert_return(e
, -EINVAL
);
3229 assert_return(e
= event_resolve(e
), -ENOPKG
);
3230 assert_return(!event_pid_changed(e
), -ECHILD
);
3231 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3232 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
3234 if (e
->exit_requested
)
3235 return dispatch_exit(e
);
3237 p
= event_next_pending(e
);
3239 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3241 ref
= sd_event_ref(e
);
3242 e
->state
= SD_EVENT_RUNNING
;
3243 r
= source_dispatch(p
);
3244 e
->state
= SD_EVENT_INITIAL
;
3248 e
->state
= SD_EVENT_INITIAL
;
3253 static void event_log_delays(sd_event
*e
) {
3254 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1], *p
;
3259 for (i
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
3260 l
= strpcpyf(&p
, l
, "%u ", e
->delays
[i
]);
3263 log_debug("Event loop iterations: %s", b
);
3266 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
3269 assert_return(e
, -EINVAL
);
3270 assert_return(e
= event_resolve(e
), -ENOPKG
);
3271 assert_return(!event_pid_changed(e
), -ECHILD
);
3272 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3273 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3275 if (e
->profile_delays
&& e
->last_run
) {
3279 this_run
= now(CLOCK_MONOTONIC
);
3281 l
= u64log2(this_run
- e
->last_run
);
3282 assert(l
< sizeof(e
->delays
));
3285 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
3286 event_log_delays(e
);
3287 e
->last_log
= this_run
;
3291 r
= sd_event_prepare(e
);
3293 /* There was nothing? Then wait... */
3294 r
= sd_event_wait(e
, timeout
);
3296 if (e
->profile_delays
)
3297 e
->last_run
= now(CLOCK_MONOTONIC
);
3300 /* There's something now, then let's dispatch it */
3301 r
= sd_event_dispatch(e
);
3311 _public_
int sd_event_loop(sd_event
*e
) {
3312 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3315 assert_return(e
, -EINVAL
);
3316 assert_return(e
= event_resolve(e
), -ENOPKG
);
3317 assert_return(!event_pid_changed(e
), -ECHILD
);
3318 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3320 ref
= sd_event_ref(e
);
3322 while (e
->state
!= SD_EVENT_FINISHED
) {
3323 r
= sd_event_run(e
, (uint64_t) -1);
3328 return e
->exit_code
;
3331 _public_
int sd_event_get_fd(sd_event
*e
) {
3333 assert_return(e
, -EINVAL
);
3334 assert_return(e
= event_resolve(e
), -ENOPKG
);
3335 assert_return(!event_pid_changed(e
), -ECHILD
);
3340 _public_
int sd_event_get_state(sd_event
*e
) {
3341 assert_return(e
, -EINVAL
);
3342 assert_return(e
= event_resolve(e
), -ENOPKG
);
3343 assert_return(!event_pid_changed(e
), -ECHILD
);
3348 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
3349 assert_return(e
, -EINVAL
);
3350 assert_return(e
= event_resolve(e
), -ENOPKG
);
3351 assert_return(code
, -EINVAL
);
3352 assert_return(!event_pid_changed(e
), -ECHILD
);
3354 if (!e
->exit_requested
)
3357 *code
= e
->exit_code
;
3361 _public_
int sd_event_exit(sd_event
*e
, int code
) {
3362 assert_return(e
, -EINVAL
);
3363 assert_return(e
= event_resolve(e
), -ENOPKG
);
3364 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3365 assert_return(!event_pid_changed(e
), -ECHILD
);
3367 e
->exit_requested
= true;
3368 e
->exit_code
= code
;
3373 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
3374 assert_return(e
, -EINVAL
);
3375 assert_return(e
= event_resolve(e
), -ENOPKG
);
3376 assert_return(usec
, -EINVAL
);
3377 assert_return(!event_pid_changed(e
), -ECHILD
);
3379 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
3382 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3383 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3384 * the purpose of getting the time this doesn't matter. */
3385 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
3388 if (!triple_timestamp_is_set(&e
->timestamp
)) {
3389 /* Implicitly fall back to now() if we never ran
3390 * before and thus have no cached time. */
3395 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
3399 _public_
int sd_event_default(sd_event
**ret
) {
3404 return !!default_event
;
3406 if (default_event
) {
3407 *ret
= sd_event_ref(default_event
);
3411 r
= sd_event_new(&e
);
3415 e
->default_event_ptr
= &default_event
;
3423 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
3424 assert_return(e
, -EINVAL
);
3425 assert_return(e
= event_resolve(e
), -ENOPKG
);
3426 assert_return(tid
, -EINVAL
);
3427 assert_return(!event_pid_changed(e
), -ECHILD
);
3437 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
3440 assert_return(e
, -EINVAL
);
3441 assert_return(e
= event_resolve(e
), -ENOPKG
);
3442 assert_return(!event_pid_changed(e
), -ECHILD
);
3444 if (e
->watchdog
== !!b
)
3448 struct epoll_event ev
;
3450 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
3454 /* Issue first ping immediately */
3455 sd_notify(false, "WATCHDOG=1");
3456 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
3458 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
3459 if (e
->watchdog_fd
< 0)
3462 r
= arm_watchdog(e
);
3466 ev
= (struct epoll_event
) {
3468 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
3471 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
3478 if (e
->watchdog_fd
>= 0) {
3479 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
3480 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3488 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3492 _public_
int sd_event_get_watchdog(sd_event
*e
) {
3493 assert_return(e
, -EINVAL
);
3494 assert_return(e
= event_resolve(e
), -ENOPKG
);
3495 assert_return(!event_pid_changed(e
), -ECHILD
);
3500 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
3501 assert_return(e
, -EINVAL
);
3502 assert_return(e
= event_resolve(e
), -ENOPKG
);
3503 assert_return(!event_pid_changed(e
), -ECHILD
);
3505 *ret
= e
->iteration
;
3509 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
3510 assert_return(s
, -EINVAL
);
3512 s
->destroy_callback
= callback
;
3516 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
3517 assert_return(s
, -EINVAL
);
3520 *ret
= s
->destroy_callback
;
3522 return !!s
->destroy_callback
;
3525 _public_
int sd_event_source_get_floating(sd_event_source
*s
) {
3526 assert_return(s
, -EINVAL
);
3531 _public_
int sd_event_source_set_floating(sd_event_source
*s
, int b
) {
3532 assert_return(s
, -EINVAL
);
3534 if (s
->floating
== !!b
)
3537 if (!s
->event
) /* Already disconnected */
3543 sd_event_source_ref(s
);
3544 sd_event_unref(s
->event
);
3546 sd_event_ref(s
->event
);
3547 sd_event_source_unref(s
);