1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <sys/timerfd.h>
11 #include "alloc-util.h"
12 #include "event-source.h"
18 #include "memory-util.h"
21 #include "process-util.h"
23 #include "signal-util.h"
24 #include "string-table.h"
25 #include "string-util.h"
27 #include "time-util.h"
29 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
33 [SOURCE_TIME_REALTIME
] = "realtime",
34 [SOURCE_TIME_BOOTTIME
] = "bootime",
35 [SOURCE_TIME_MONOTONIC
] = "monotonic",
36 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
37 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
38 [SOURCE_SIGNAL
] = "signal",
39 [SOURCE_CHILD
] = "child",
40 [SOURCE_DEFER
] = "defer",
41 [SOURCE_POST
] = "post",
42 [SOURCE_EXIT
] = "exit",
43 [SOURCE_WATCHDOG
] = "watchdog",
44 [SOURCE_INOTIFY
] = "inotify",
47 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
49 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
60 /* timerfd_create() only supports these five clocks so far. We
61 * can add support for more clocks when the kernel learns to
62 * deal with them, too. */
63 struct clock_data realtime
;
64 struct clock_data boottime
;
65 struct clock_data monotonic
;
66 struct clock_data realtime_alarm
;
67 struct clock_data boottime_alarm
;
71 sd_event_source
**signal_sources
; /* indexed by signal number */
72 Hashmap
*signal_data
; /* indexed by priority */
74 Hashmap
*child_sources
;
75 unsigned n_enabled_child_sources
;
81 Hashmap
*inotify_data
; /* indexed by priority */
83 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
84 LIST_HEAD(struct inode_data
, inode_data_to_close
);
86 /* A list of inotify objects that already have events buffered which aren't processed yet */
87 LIST_HEAD(struct inotify_data
, inotify_data_buffered
);
92 triple_timestamp timestamp
;
95 bool exit_requested
:1;
96 bool need_process_child
:1;
98 bool profile_delays
:1;
103 sd_event
**default_event_ptr
;
105 usec_t watchdog_last
, watchdog_period
;
109 LIST_HEAD(sd_event_source
, sources
);
111 usec_t last_run
, last_log
;
112 unsigned delays
[sizeof(usec_t
) * 8];
115 static thread_local sd_event
*default_event
= NULL
;
117 static void source_disconnect(sd_event_source
*s
);
118 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
120 static sd_event
*event_resolve(sd_event
*e
) {
121 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
124 static int pending_prioq_compare(const void *a
, const void *b
) {
125 const sd_event_source
*x
= a
, *y
= b
;
131 /* Enabled ones first */
132 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
134 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
137 /* Lower priority values first */
138 r
= CMP(x
->priority
, y
->priority
);
142 /* Older entries first */
143 return CMP(x
->pending_iteration
, y
->pending_iteration
);
146 static int prepare_prioq_compare(const void *a
, const void *b
) {
147 const sd_event_source
*x
= a
, *y
= b
;
153 /* Enabled ones first */
154 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
156 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
159 /* Move most recently prepared ones last, so that we can stop
160 * preparing as soon as we hit one that has already been
161 * prepared in the current iteration */
162 r
= CMP(x
->prepare_iteration
, y
->prepare_iteration
);
166 /* Lower priority values first */
167 return CMP(x
->priority
, y
->priority
);
170 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
171 const sd_event_source
*x
= a
, *y
= b
;
173 assert(EVENT_SOURCE_IS_TIME(x
->type
));
174 assert(x
->type
== y
->type
);
176 /* Enabled ones first */
177 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
179 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
182 /* Move the pending ones to the end */
183 if (!x
->pending
&& y
->pending
)
185 if (x
->pending
&& !y
->pending
)
189 return CMP(x
->time
.next
, y
->time
.next
);
192 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
193 return usec_add(s
->time
.next
, s
->time
.accuracy
);
196 static int latest_time_prioq_compare(const void *a
, const void *b
) {
197 const sd_event_source
*x
= a
, *y
= b
;
199 assert(EVENT_SOURCE_IS_TIME(x
->type
));
200 assert(x
->type
== y
->type
);
202 /* Enabled ones first */
203 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
205 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
208 /* Move the pending ones to the end */
209 if (!x
->pending
&& y
->pending
)
211 if (x
->pending
&& !y
->pending
)
215 return CMP(time_event_source_latest(x
), time_event_source_latest(y
));
218 static int exit_prioq_compare(const void *a
, const void *b
) {
219 const sd_event_source
*x
= a
, *y
= b
;
221 assert(x
->type
== SOURCE_EXIT
);
222 assert(y
->type
== SOURCE_EXIT
);
224 /* Enabled ones first */
225 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
227 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
230 /* Lower priority values first */
231 return CMP(x
->priority
, y
->priority
);
234 static void free_clock_data(struct clock_data
*d
) {
236 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
239 prioq_free(d
->earliest
);
240 prioq_free(d
->latest
);
243 static sd_event
*event_free(sd_event
*e
) {
248 while ((s
= e
->sources
)) {
250 source_disconnect(s
);
251 sd_event_source_unref(s
);
254 assert(e
->n_sources
== 0);
256 if (e
->default_event_ptr
)
257 *(e
->default_event_ptr
) = NULL
;
259 safe_close(e
->epoll_fd
);
260 safe_close(e
->watchdog_fd
);
262 free_clock_data(&e
->realtime
);
263 free_clock_data(&e
->boottime
);
264 free_clock_data(&e
->monotonic
);
265 free_clock_data(&e
->realtime_alarm
);
266 free_clock_data(&e
->boottime_alarm
);
268 prioq_free(e
->pending
);
269 prioq_free(e
->prepare
);
272 free(e
->signal_sources
);
273 hashmap_free(e
->signal_data
);
275 hashmap_free(e
->inotify_data
);
277 hashmap_free(e
->child_sources
);
278 set_free(e
->post_sources
);
283 _public_
int sd_event_new(sd_event
** ret
) {
287 assert_return(ret
, -EINVAL
);
289 e
= new(sd_event
, 1);
297 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
299 .realtime
.next
= USEC_INFINITY
,
300 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
302 .boottime
.next
= USEC_INFINITY
,
303 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
305 .monotonic
.next
= USEC_INFINITY
,
306 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
307 .realtime_alarm
.fd
= -1,
308 .realtime_alarm
.next
= USEC_INFINITY
,
309 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
310 .boottime_alarm
.fd
= -1,
311 .boottime_alarm
.next
= USEC_INFINITY
,
312 .perturb
= USEC_INFINITY
,
313 .original_pid
= getpid_cached(),
316 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
320 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
321 if (e
->epoll_fd
< 0) {
326 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
328 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
329 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
330 e
->profile_delays
= true;
341 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event
, sd_event
, event_free
);
343 _public_ sd_event_source
* sd_event_source_disable_unref(sd_event_source
*s
) {
345 (void) sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
346 return sd_event_source_unref(s
);
349 static bool event_pid_changed(sd_event
*e
) {
352 /* We don't support people creating an event loop and keeping
353 * it around over a fork(). Let's complain. */
355 return e
->original_pid
!= getpid_cached();
358 static void source_io_unregister(sd_event_source
*s
) {
362 assert(s
->type
== SOURCE_IO
);
364 if (event_pid_changed(s
->event
))
367 if (!s
->io
.registered
)
370 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
372 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
373 strna(s
->description
), event_source_type_to_string(s
->type
));
375 s
->io
.registered
= false;
378 static int source_io_register(
383 struct epoll_event ev
;
387 assert(s
->type
== SOURCE_IO
);
388 assert(enabled
!= SD_EVENT_OFF
);
390 ev
= (struct epoll_event
) {
391 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
395 if (s
->io
.registered
)
396 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
398 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
402 s
->io
.registered
= true;
407 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
411 case SOURCE_TIME_REALTIME
:
412 return CLOCK_REALTIME
;
414 case SOURCE_TIME_BOOTTIME
:
415 return CLOCK_BOOTTIME
;
417 case SOURCE_TIME_MONOTONIC
:
418 return CLOCK_MONOTONIC
;
420 case SOURCE_TIME_REALTIME_ALARM
:
421 return CLOCK_REALTIME_ALARM
;
423 case SOURCE_TIME_BOOTTIME_ALARM
:
424 return CLOCK_BOOTTIME_ALARM
;
427 return (clockid_t
) -1;
431 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
436 return SOURCE_TIME_REALTIME
;
439 return SOURCE_TIME_BOOTTIME
;
441 case CLOCK_MONOTONIC
:
442 return SOURCE_TIME_MONOTONIC
;
444 case CLOCK_REALTIME_ALARM
:
445 return SOURCE_TIME_REALTIME_ALARM
;
447 case CLOCK_BOOTTIME_ALARM
:
448 return SOURCE_TIME_BOOTTIME_ALARM
;
451 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
455 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
460 case SOURCE_TIME_REALTIME
:
463 case SOURCE_TIME_BOOTTIME
:
466 case SOURCE_TIME_MONOTONIC
:
467 return &e
->monotonic
;
469 case SOURCE_TIME_REALTIME_ALARM
:
470 return &e
->realtime_alarm
;
472 case SOURCE_TIME_BOOTTIME_ALARM
:
473 return &e
->boottime_alarm
;
480 static void event_free_signal_data(sd_event
*e
, struct signal_data
*d
) {
486 hashmap_remove(e
->signal_data
, &d
->priority
);
491 static int event_make_signal_data(
494 struct signal_data
**ret
) {
496 struct epoll_event ev
;
497 struct signal_data
*d
;
505 if (event_pid_changed(e
))
508 if (e
->signal_sources
&& e
->signal_sources
[sig
])
509 priority
= e
->signal_sources
[sig
]->priority
;
511 priority
= SD_EVENT_PRIORITY_NORMAL
;
513 d
= hashmap_get(e
->signal_data
, &priority
);
515 if (sigismember(&d
->sigset
, sig
) > 0) {
521 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
525 d
= new(struct signal_data
, 1);
529 *d
= (struct signal_data
) {
530 .wakeup
= WAKEUP_SIGNAL_DATA
,
532 .priority
= priority
,
535 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
545 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
547 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
561 d
->fd
= fd_move_above_stdio(r
);
563 ev
= (struct epoll_event
) {
568 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
581 event_free_signal_data(e
, d
);
586 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
590 /* Turns off the specified signal in the signal data
591 * object. If the signal mask of the object becomes empty that
594 if (sigismember(&d
->sigset
, sig
) == 0)
597 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
599 if (sigisemptyset(&d
->sigset
)) {
600 /* If all the mask is all-zero we can get rid of the structure */
601 event_free_signal_data(e
, d
);
607 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
608 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
611 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
612 struct signal_data
*d
;
613 static const int64_t zero_priority
= 0;
617 /* Rechecks if the specified signal is still something we are
618 * interested in. If not, we'll unmask it, and possibly drop
619 * the signalfd for it. */
621 if (sig
== SIGCHLD
&&
622 e
->n_enabled_child_sources
> 0)
625 if (e
->signal_sources
&&
626 e
->signal_sources
[sig
] &&
627 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
631 * The specified signal might be enabled in three different queues:
633 * 1) the one that belongs to the priority passed (if it is non-NULL)
634 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
635 * 3) the 0 priority (to cover the SIGCHLD case)
637 * Hence, let's remove it from all three here.
641 d
= hashmap_get(e
->signal_data
, priority
);
643 event_unmask_signal_data(e
, d
, sig
);
646 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
647 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
649 event_unmask_signal_data(e
, d
, sig
);
652 d
= hashmap_get(e
->signal_data
, &zero_priority
);
654 event_unmask_signal_data(e
, d
, sig
);
657 static void source_disconnect(sd_event_source
*s
) {
665 assert(s
->event
->n_sources
> 0);
671 source_io_unregister(s
);
675 case SOURCE_TIME_REALTIME
:
676 case SOURCE_TIME_BOOTTIME
:
677 case SOURCE_TIME_MONOTONIC
:
678 case SOURCE_TIME_REALTIME_ALARM
:
679 case SOURCE_TIME_BOOTTIME_ALARM
: {
680 struct clock_data
*d
;
682 d
= event_get_clock_data(s
->event
, s
->type
);
685 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
686 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
687 d
->needs_rearm
= true;
692 if (s
->signal
.sig
> 0) {
694 if (s
->event
->signal_sources
)
695 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
697 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
703 if (s
->child
.pid
> 0) {
704 if (s
->enabled
!= SD_EVENT_OFF
) {
705 assert(s
->event
->n_enabled_child_sources
> 0);
706 s
->event
->n_enabled_child_sources
--;
709 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
710 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
720 set_remove(s
->event
->post_sources
, s
);
724 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
727 case SOURCE_INOTIFY
: {
728 struct inode_data
*inode_data
;
730 inode_data
= s
->inotify
.inode_data
;
732 struct inotify_data
*inotify_data
;
733 assert_se(inotify_data
= inode_data
->inotify_data
);
735 /* Detach this event source from the inode object */
736 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
737 s
->inotify
.inode_data
= NULL
;
740 assert(inotify_data
->n_pending
> 0);
741 inotify_data
->n_pending
--;
744 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
745 * continued to being watched. That's because inotify doesn't really have an API for that: we
746 * can only change watch masks with access to the original inode either by fd or by path. But
747 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
748 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
749 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
750 * there), but given the need for open_by_handle_at() which is privileged and not universally
751 * available this would be quite an incomplete solution. Hence we go the other way, leave the
752 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
753 * anymore after reception. Yes, this sucks, but … Linux … */
755 /* Maybe release the inode data (and its inotify) */
756 event_gc_inode_data(s
->event
, inode_data
);
763 assert_not_reached("Wut? I shouldn't exist.");
767 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
770 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
774 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
776 LIST_REMOVE(sources
, event
->sources
, s
);
780 sd_event_unref(event
);
783 static void source_free(sd_event_source
*s
) {
786 source_disconnect(s
);
788 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
789 s
->io
.fd
= safe_close(s
->io
.fd
);
791 if (s
->destroy_callback
)
792 s
->destroy_callback(s
->userdata
);
794 free(s
->description
);
797 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source
*, source_free
);
799 static int source_set_pending(sd_event_source
*s
, bool b
) {
803 assert(s
->type
!= SOURCE_EXIT
);
811 s
->pending_iteration
= s
->event
->iteration
;
813 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
819 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
821 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
822 struct clock_data
*d
;
824 d
= event_get_clock_data(s
->event
, s
->type
);
827 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
828 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
829 d
->needs_rearm
= true;
832 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
833 struct signal_data
*d
;
835 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
836 if (d
&& d
->current
== s
)
840 if (s
->type
== SOURCE_INOTIFY
) {
842 assert(s
->inotify
.inode_data
);
843 assert(s
->inotify
.inode_data
->inotify_data
);
846 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
848 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
849 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
856 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
861 s
= new(sd_event_source
, 1);
865 *s
= (struct sd_event_source
) {
868 .floating
= floating
,
870 .pending_index
= PRIOQ_IDX_NULL
,
871 .prepare_index
= PRIOQ_IDX_NULL
,
877 LIST_PREPEND(sources
, e
->sources
, s
);
883 _public_
int sd_event_add_io(
885 sd_event_source
**ret
,
888 sd_event_io_handler_t callback
,
891 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
894 assert_return(e
, -EINVAL
);
895 assert_return(e
= event_resolve(e
), -ENOPKG
);
896 assert_return(fd
>= 0, -EBADF
);
897 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
898 assert_return(callback
, -EINVAL
);
899 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
900 assert_return(!event_pid_changed(e
), -ECHILD
);
902 s
= source_new(e
, !ret
, SOURCE_IO
);
906 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
908 s
->io
.events
= events
;
909 s
->io
.callback
= callback
;
910 s
->userdata
= userdata
;
911 s
->enabled
= SD_EVENT_ON
;
913 r
= source_io_register(s
, s
->enabled
, events
);
924 static void initialize_perturb(sd_event
*e
) {
925 sd_id128_t bootid
= {};
927 /* When we sleep for longer, we try to realign the wakeup to
928 the same time within each minute/second/250ms, so that
929 events all across the system can be coalesced into a single
930 CPU wakeup. However, let's take some system-specific
931 randomness for this value, so that in a network of systems
932 with synced clocks timer events are distributed a
933 bit. Here, we calculate a perturbation usec offset from the
936 if (_likely_(e
->perturb
!= USEC_INFINITY
))
939 if (sd_id128_get_boot(&bootid
) >= 0)
940 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
943 static int event_setup_timer_fd(
945 struct clock_data
*d
,
948 struct epoll_event ev
;
954 if (_likely_(d
->fd
>= 0))
957 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
961 fd
= fd_move_above_stdio(fd
);
963 ev
= (struct epoll_event
) {
968 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
978 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
981 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
984 _public_
int sd_event_add_time(
986 sd_event_source
**ret
,
990 sd_event_time_handler_t callback
,
993 EventSourceType type
;
994 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
995 struct clock_data
*d
;
998 assert_return(e
, -EINVAL
);
999 assert_return(e
= event_resolve(e
), -ENOPKG
);
1000 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1001 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1002 assert_return(!event_pid_changed(e
), -ECHILD
);
1004 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1007 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1012 callback
= time_exit_callback
;
1014 d
= event_get_clock_data(e
, type
);
1017 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1021 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1026 r
= event_setup_timer_fd(e
, d
, clock
);
1031 s
= source_new(e
, !ret
, type
);
1035 s
->time
.next
= usec
;
1036 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1037 s
->time
.callback
= callback
;
1038 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1039 s
->userdata
= userdata
;
1040 s
->enabled
= SD_EVENT_ONESHOT
;
1042 d
->needs_rearm
= true;
1044 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1048 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1059 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1062 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1065 _public_
int sd_event_add_signal(
1067 sd_event_source
**ret
,
1069 sd_event_signal_handler_t callback
,
1072 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1073 struct signal_data
*d
;
1077 assert_return(e
, -EINVAL
);
1078 assert_return(e
= event_resolve(e
), -ENOPKG
);
1079 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1080 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1081 assert_return(!event_pid_changed(e
), -ECHILD
);
1084 callback
= signal_exit_callback
;
1086 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1090 if (!sigismember(&ss
, sig
))
1093 if (!e
->signal_sources
) {
1094 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1095 if (!e
->signal_sources
)
1097 } else if (e
->signal_sources
[sig
])
1100 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1104 s
->signal
.sig
= sig
;
1105 s
->signal
.callback
= callback
;
1106 s
->userdata
= userdata
;
1107 s
->enabled
= SD_EVENT_ON
;
1109 e
->signal_sources
[sig
] = s
;
1111 r
= event_make_signal_data(e
, sig
, &d
);
1115 /* Use the signal name as description for the event source by default */
1116 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1125 _public_
int sd_event_add_child(
1127 sd_event_source
**ret
,
1130 sd_event_child_handler_t callback
,
1133 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1136 assert_return(e
, -EINVAL
);
1137 assert_return(e
= event_resolve(e
), -ENOPKG
);
1138 assert_return(pid
> 1, -EINVAL
);
1139 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1140 assert_return(options
!= 0, -EINVAL
);
1141 assert_return(callback
, -EINVAL
);
1142 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1143 assert_return(!event_pid_changed(e
), -ECHILD
);
1145 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1149 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1152 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1157 s
->child
.options
= options
;
1158 s
->child
.callback
= callback
;
1159 s
->userdata
= userdata
;
1160 s
->enabled
= SD_EVENT_ONESHOT
;
1162 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1166 e
->n_enabled_child_sources
++;
1168 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1170 e
->n_enabled_child_sources
--;
1174 e
->need_process_child
= true;
1183 _public_
int sd_event_add_defer(
1185 sd_event_source
**ret
,
1186 sd_event_handler_t callback
,
1189 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1192 assert_return(e
, -EINVAL
);
1193 assert_return(e
= event_resolve(e
), -ENOPKG
);
1194 assert_return(callback
, -EINVAL
);
1195 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1196 assert_return(!event_pid_changed(e
), -ECHILD
);
1198 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1202 s
->defer
.callback
= callback
;
1203 s
->userdata
= userdata
;
1204 s
->enabled
= SD_EVENT_ONESHOT
;
1206 r
= source_set_pending(s
, true);
1217 _public_
int sd_event_add_post(
1219 sd_event_source
**ret
,
1220 sd_event_handler_t callback
,
1223 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1226 assert_return(e
, -EINVAL
);
1227 assert_return(e
= event_resolve(e
), -ENOPKG
);
1228 assert_return(callback
, -EINVAL
);
1229 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1230 assert_return(!event_pid_changed(e
), -ECHILD
);
1232 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1236 s
= source_new(e
, !ret
, SOURCE_POST
);
1240 s
->post
.callback
= callback
;
1241 s
->userdata
= userdata
;
1242 s
->enabled
= SD_EVENT_ON
;
1244 r
= set_put(e
->post_sources
, s
);
1255 _public_
int sd_event_add_exit(
1257 sd_event_source
**ret
,
1258 sd_event_handler_t callback
,
1261 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1264 assert_return(e
, -EINVAL
);
1265 assert_return(e
= event_resolve(e
), -ENOPKG
);
1266 assert_return(callback
, -EINVAL
);
1267 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1268 assert_return(!event_pid_changed(e
), -ECHILD
);
1270 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1274 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1278 s
->exit
.callback
= callback
;
1279 s
->userdata
= userdata
;
1280 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1281 s
->enabled
= SD_EVENT_ONESHOT
;
1283 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1294 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
1300 assert(hashmap_isempty(d
->inodes
));
1301 assert(hashmap_isempty(d
->wd
));
1303 if (d
->buffer_filled
> 0)
1304 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
1306 hashmap_free(d
->inodes
);
1307 hashmap_free(d
->wd
);
1309 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
1312 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
1313 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
1320 static int event_make_inotify_data(
1323 struct inotify_data
**ret
) {
1325 _cleanup_close_
int fd
= -1;
1326 struct inotify_data
*d
;
1327 struct epoll_event ev
;
1332 d
= hashmap_get(e
->inotify_data
, &priority
);
1339 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
1343 fd
= fd_move_above_stdio(fd
);
1345 r
= hashmap_ensure_allocated(&e
->inotify_data
, &uint64_hash_ops
);
1349 d
= new(struct inotify_data
, 1);
1353 *d
= (struct inotify_data
) {
1354 .wakeup
= WAKEUP_INOTIFY_DATA
,
1356 .priority
= priority
,
1359 r
= hashmap_put(e
->inotify_data
, &d
->priority
, d
);
1361 d
->fd
= safe_close(d
->fd
);
1366 ev
= (struct epoll_event
) {
1371 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
1373 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1374 * remove the fd from the epoll first, which we don't want as we couldn't
1375 * add it in the first place. */
1376 event_free_inotify_data(e
, d
);
1386 static int inode_data_compare(const struct inode_data
*x
, const struct inode_data
*y
) {
1392 r
= CMP(x
->dev
, y
->dev
);
1396 return CMP(x
->ino
, y
->ino
);
1399 static void inode_data_hash_func(const struct inode_data
*d
, struct siphash
*state
) {
1402 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
1403 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
1406 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops
, struct inode_data
, inode_data_hash_func
, inode_data_compare
);
1408 static void event_free_inode_data(
1410 struct inode_data
*d
) {
1417 assert(!d
->event_sources
);
1420 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
1424 if (d
->inotify_data
) {
1427 if (d
->inotify_data
->fd
>= 0) {
1428 /* So here's a problem. At the time this runs the watch descriptor might already be
1429 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1430 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1431 * likely case to happen. */
1433 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
1434 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
1437 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
1440 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
1446 static void event_gc_inode_data(
1448 struct inode_data
*d
) {
1450 struct inotify_data
*inotify_data
;
1457 if (d
->event_sources
)
1460 inotify_data
= d
->inotify_data
;
1461 event_free_inode_data(e
, d
);
1463 if (inotify_data
&& hashmap_isempty(inotify_data
->inodes
))
1464 event_free_inotify_data(e
, inotify_data
);
1467 static int event_make_inode_data(
1469 struct inotify_data
*inotify_data
,
1472 struct inode_data
**ret
) {
1474 struct inode_data
*d
, key
;
1478 assert(inotify_data
);
1480 key
= (struct inode_data
) {
1485 d
= hashmap_get(inotify_data
->inodes
, &key
);
1493 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
1497 d
= new(struct inode_data
, 1);
1501 *d
= (struct inode_data
) {
1506 .inotify_data
= inotify_data
,
1509 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
1521 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
1522 bool excl_unlink
= true;
1523 uint32_t combined
= 0;
1528 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1529 * the IN_EXCL_UNLINK flag is ANDed instead.
1531 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1532 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1533 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1534 * events we don't care for client-side. */
1536 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
1538 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
1539 excl_unlink
= false;
1541 combined
|= s
->inotify
.mask
;
1544 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
1547 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
1548 uint32_t combined_mask
;
1554 combined_mask
= inode_data_determine_mask(d
);
1556 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
1559 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
1563 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
1568 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
1570 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
1576 } else if (d
->wd
!= wd
) {
1578 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1579 (void) inotify_rm_watch(d
->fd
, wd
);
1583 d
->combined_mask
= combined_mask
;
1587 _public_
int sd_event_add_inotify(
1589 sd_event_source
**ret
,
1592 sd_event_inotify_handler_t callback
,
1595 struct inotify_data
*inotify_data
= NULL
;
1596 struct inode_data
*inode_data
= NULL
;
1597 _cleanup_close_
int fd
= -1;
1598 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1602 assert_return(e
, -EINVAL
);
1603 assert_return(e
= event_resolve(e
), -ENOPKG
);
1604 assert_return(path
, -EINVAL
);
1605 assert_return(callback
, -EINVAL
);
1606 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1607 assert_return(!event_pid_changed(e
), -ECHILD
);
1609 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1610 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1611 * the user can't use them for us. */
1612 if (mask
& IN_MASK_ADD
)
1615 fd
= open(path
, O_PATH
|O_CLOEXEC
|
1616 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0)|
1617 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
1621 if (fstat(fd
, &st
) < 0)
1624 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
1628 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
1629 s
->inotify
.mask
= mask
;
1630 s
->inotify
.callback
= callback
;
1631 s
->userdata
= userdata
;
1633 /* Allocate an inotify object for this priority, and an inode object within it */
1634 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
1638 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
1640 event_free_inotify_data(e
, inotify_data
);
1644 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1645 * the event source, until then, for which we need the original inode. */
1646 if (inode_data
->fd
< 0) {
1647 inode_data
->fd
= TAKE_FD(fd
);
1648 LIST_PREPEND(to_close
, e
->inode_data_to_close
, inode_data
);
1651 /* Link our event source to the inode data object */
1652 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1653 s
->inotify
.inode_data
= inode_data
;
1655 /* Actually realize the watch now */
1656 r
= inode_data_realize_watch(e
, inode_data
);
1660 (void) sd_event_source_set_description(s
, path
);
1669 static sd_event_source
* event_source_free(sd_event_source
*s
) {
1673 /* Here's a special hack: when we are called from a
1674 * dispatch handler we won't free the event source
1675 * immediately, but we will detach the fd from the
1676 * epoll. This way it is safe for the caller to unref
1677 * the event source and immediately close the fd, but
1678 * we still retain a valid event source object after
1681 if (s
->dispatching
) {
1682 if (s
->type
== SOURCE_IO
)
1683 source_io_unregister(s
);
1685 source_disconnect(s
);
1692 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source
, sd_event_source
, event_source_free
);
1694 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1695 assert_return(s
, -EINVAL
);
1696 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1698 return free_and_strdup(&s
->description
, description
);
1701 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1702 assert_return(s
, -EINVAL
);
1703 assert_return(description
, -EINVAL
);
1704 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1706 if (!s
->description
)
1709 *description
= s
->description
;
1713 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1714 assert_return(s
, NULL
);
1719 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1720 assert_return(s
, -EINVAL
);
1721 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1722 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1723 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1728 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1729 assert_return(s
, -EINVAL
);
1730 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1731 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1736 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1739 assert_return(s
, -EINVAL
);
1740 assert_return(fd
>= 0, -EBADF
);
1741 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1742 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1747 if (s
->enabled
== SD_EVENT_OFF
) {
1749 s
->io
.registered
= false;
1753 saved_fd
= s
->io
.fd
;
1754 assert(s
->io
.registered
);
1757 s
->io
.registered
= false;
1759 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1761 s
->io
.fd
= saved_fd
;
1762 s
->io
.registered
= true;
1766 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1772 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
1773 assert_return(s
, -EINVAL
);
1774 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1779 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
1780 assert_return(s
, -EINVAL
);
1781 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1787 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1788 assert_return(s
, -EINVAL
);
1789 assert_return(events
, -EINVAL
);
1790 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1791 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1793 *events
= s
->io
.events
;
1797 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1800 assert_return(s
, -EINVAL
);
1801 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1802 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1803 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1804 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1806 /* edge-triggered updates are never skipped, so we can reset edges */
1807 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1810 r
= source_set_pending(s
, false);
1814 if (s
->enabled
!= SD_EVENT_OFF
) {
1815 r
= source_io_register(s
, s
->enabled
, events
);
1820 s
->io
.events
= events
;
1825 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1826 assert_return(s
, -EINVAL
);
1827 assert_return(revents
, -EINVAL
);
1828 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1829 assert_return(s
->pending
, -ENODATA
);
1830 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1832 *revents
= s
->io
.revents
;
1836 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1837 assert_return(s
, -EINVAL
);
1838 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1839 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1841 return s
->signal
.sig
;
1844 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1845 assert_return(s
, -EINVAL
);
1846 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1848 *priority
= s
->priority
;
1852 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1853 bool rm_inotify
= false, rm_inode
= false;
1854 struct inotify_data
*new_inotify_data
= NULL
;
1855 struct inode_data
*new_inode_data
= NULL
;
1858 assert_return(s
, -EINVAL
);
1859 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1860 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1862 if (s
->priority
== priority
)
1865 if (s
->type
== SOURCE_INOTIFY
) {
1866 struct inode_data
*old_inode_data
;
1868 assert(s
->inotify
.inode_data
);
1869 old_inode_data
= s
->inotify
.inode_data
;
1871 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1872 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1873 * events we allow priority changes only until the first following iteration. */
1874 if (old_inode_data
->fd
< 0)
1877 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
1882 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
1887 if (new_inode_data
->fd
< 0) {
1888 /* Duplicate the fd for the new inode object if we don't have any yet */
1889 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
1890 if (new_inode_data
->fd
< 0) {
1895 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close
, new_inode_data
);
1898 /* Move the event source to the new inode data structure */
1899 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
1900 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
1901 s
->inotify
.inode_data
= new_inode_data
;
1903 /* Now create the new watch */
1904 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
1907 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
1908 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
1909 s
->inotify
.inode_data
= old_inode_data
;
1913 s
->priority
= priority
;
1915 event_gc_inode_data(s
->event
, old_inode_data
);
1917 } else if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1918 struct signal_data
*old
, *d
;
1920 /* Move us from the signalfd belonging to the old
1921 * priority to the signalfd of the new priority */
1923 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1925 s
->priority
= priority
;
1927 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1929 s
->priority
= old
->priority
;
1933 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1935 s
->priority
= priority
;
1938 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1941 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1943 if (s
->type
== SOURCE_EXIT
)
1944 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1950 event_free_inode_data(s
->event
, new_inode_data
);
1953 event_free_inotify_data(s
->event
, new_inotify_data
);
1958 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1959 assert_return(s
, -EINVAL
);
1960 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1964 return s
->enabled
!= SD_EVENT_OFF
;
1967 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1970 assert_return(s
, -EINVAL
);
1971 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
1972 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1974 /* If we are dead anyway, we are fine with turning off
1975 * sources, but everything else needs to fail. */
1976 if (s
->event
->state
== SD_EVENT_FINISHED
)
1977 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1979 if (s
->enabled
== m
)
1982 if (m
== SD_EVENT_OFF
) {
1984 /* Unset the pending flag when this event source is disabled */
1985 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
1986 r
= source_set_pending(s
, false);
1994 source_io_unregister(s
);
1998 case SOURCE_TIME_REALTIME
:
1999 case SOURCE_TIME_BOOTTIME
:
2000 case SOURCE_TIME_MONOTONIC
:
2001 case SOURCE_TIME_REALTIME_ALARM
:
2002 case SOURCE_TIME_BOOTTIME_ALARM
: {
2003 struct clock_data
*d
;
2006 d
= event_get_clock_data(s
->event
, s
->type
);
2009 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2010 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2011 d
->needs_rearm
= true;
2018 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2024 assert(s
->event
->n_enabled_child_sources
> 0);
2025 s
->event
->n_enabled_child_sources
--;
2027 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2032 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2037 case SOURCE_INOTIFY
:
2042 assert_not_reached("Wut? I shouldn't exist.");
2047 /* Unset the pending flag when this event source is enabled */
2048 if (s
->enabled
== SD_EVENT_OFF
&& !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2049 r
= source_set_pending(s
, false);
2057 r
= source_io_register(s
, m
, s
->io
.events
);
2064 case SOURCE_TIME_REALTIME
:
2065 case SOURCE_TIME_BOOTTIME
:
2066 case SOURCE_TIME_MONOTONIC
:
2067 case SOURCE_TIME_REALTIME_ALARM
:
2068 case SOURCE_TIME_BOOTTIME_ALARM
: {
2069 struct clock_data
*d
;
2072 d
= event_get_clock_data(s
->event
, s
->type
);
2075 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2076 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2077 d
->needs_rearm
= true;
2085 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2087 s
->enabled
= SD_EVENT_OFF
;
2088 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2096 if (s
->enabled
== SD_EVENT_OFF
)
2097 s
->event
->n_enabled_child_sources
++;
2101 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2103 s
->enabled
= SD_EVENT_OFF
;
2104 s
->event
->n_enabled_child_sources
--;
2105 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2113 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2118 case SOURCE_INOTIFY
:
2123 assert_not_reached("Wut? I shouldn't exist.");
2128 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2131 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2136 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
2137 assert_return(s
, -EINVAL
);
2138 assert_return(usec
, -EINVAL
);
2139 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2140 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2142 *usec
= s
->time
.next
;
2146 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
2147 struct clock_data
*d
;
2150 assert_return(s
, -EINVAL
);
2151 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2152 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2153 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2155 r
= source_set_pending(s
, false);
2159 s
->time
.next
= usec
;
2161 d
= event_get_clock_data(s
->event
, s
->type
);
2164 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2165 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2166 d
->needs_rearm
= true;
2171 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
2172 assert_return(s
, -EINVAL
);
2173 assert_return(usec
, -EINVAL
);
2174 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2175 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2177 *usec
= s
->time
.accuracy
;
2181 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
2182 struct clock_data
*d
;
2185 assert_return(s
, -EINVAL
);
2186 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
2187 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2188 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2189 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2191 r
= source_set_pending(s
, false);
2196 usec
= DEFAULT_ACCURACY_USEC
;
2198 s
->time
.accuracy
= usec
;
2200 d
= event_get_clock_data(s
->event
, s
->type
);
2203 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2204 d
->needs_rearm
= true;
2209 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
2210 assert_return(s
, -EINVAL
);
2211 assert_return(clock
, -EINVAL
);
2212 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2213 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2215 *clock
= event_source_type_to_clock(s
->type
);
2219 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
2220 assert_return(s
, -EINVAL
);
2221 assert_return(pid
, -EINVAL
);
2222 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2223 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2225 *pid
= s
->child
.pid
;
2229 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
2230 assert_return(s
, -EINVAL
);
2231 assert_return(mask
, -EINVAL
);
2232 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
2233 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2235 *mask
= s
->inotify
.mask
;
2239 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
2242 assert_return(s
, -EINVAL
);
2243 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2244 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2245 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2247 if (s
->prepare
== callback
)
2250 if (callback
&& s
->prepare
) {
2251 s
->prepare
= callback
;
2255 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
2259 s
->prepare
= callback
;
2262 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
2266 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
2271 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
2272 assert_return(s
, NULL
);
2277 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
2280 assert_return(s
, NULL
);
2283 s
->userdata
= userdata
;
2288 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
2295 if (a
>= USEC_INFINITY
)
2296 return USEC_INFINITY
;
2301 initialize_perturb(e
);
2304 Find a good time to wake up again between times a and b. We
2305 have two goals here:
2307 a) We want to wake up as seldom as possible, hence prefer
2308 later times over earlier times.
2310 b) But if we have to wake up, then let's make sure to
2311 dispatch as much as possible on the entire system.
2313 We implement this by waking up everywhere at the same time
2314 within any given minute if we can, synchronised via the
2315 perturbation value determined from the boot ID. If we can't,
2316 then we try to find the same spot in every 10s, then 1s and
2317 then 250ms step. Otherwise, we pick the last possible time
2321 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
2323 if (_unlikely_(c
< USEC_PER_MINUTE
))
2326 c
-= USEC_PER_MINUTE
;
2332 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
2334 if (_unlikely_(c
< USEC_PER_SEC
*10))
2337 c
-= USEC_PER_SEC
*10;
2343 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
2345 if (_unlikely_(c
< USEC_PER_SEC
))
2354 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
2356 if (_unlikely_(c
< USEC_PER_MSEC
*250))
2359 c
-= USEC_PER_MSEC
*250;
2368 static int event_arm_timer(
2370 struct clock_data
*d
) {
2372 struct itimerspec its
= {};
2373 sd_event_source
*a
, *b
;
2380 if (!d
->needs_rearm
)
2383 d
->needs_rearm
= false;
2385 a
= prioq_peek(d
->earliest
);
2386 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2391 if (d
->next
== USEC_INFINITY
)
2395 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2399 d
->next
= USEC_INFINITY
;
2403 b
= prioq_peek(d
->latest
);
2404 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2406 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2410 assert_se(d
->fd
>= 0);
2413 /* We don' want to disarm here, just mean some time looooong ago. */
2414 its
.it_value
.tv_sec
= 0;
2415 its
.it_value
.tv_nsec
= 1;
2417 timespec_store(&its
.it_value
, t
);
2419 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2427 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2430 assert(s
->type
== SOURCE_IO
);
2432 /* If the event source was already pending, we just OR in the
2433 * new revents, otherwise we reset the value. The ORing is
2434 * necessary to handle EPOLLONESHOT events properly where
2435 * readability might happen independently of writability, and
2436 * we need to keep track of both */
2439 s
->io
.revents
|= revents
;
2441 s
->io
.revents
= revents
;
2443 return source_set_pending(s
, true);
2446 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2453 assert_return(events
== EPOLLIN
, -EIO
);
2455 ss
= read(fd
, &x
, sizeof(x
));
2457 if (IN_SET(errno
, EAGAIN
, EINTR
))
2463 if (_unlikely_(ss
!= sizeof(x
)))
2467 *next
= USEC_INFINITY
;
2472 static int process_timer(
2475 struct clock_data
*d
) {
2484 s
= prioq_peek(d
->earliest
);
2487 s
->enabled
== SD_EVENT_OFF
||
2491 r
= source_set_pending(s
, true);
2495 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2496 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2497 d
->needs_rearm
= true;
2503 static int process_child(sd_event
*e
) {
2510 e
->need_process_child
= false;
2513 So, this is ugly. We iteratively invoke waitid() with P_PID
2514 + WNOHANG for each PID we wait for, instead of using
2515 P_ALL. This is because we only want to get child
2516 information of very specific child processes, and not all
2517 of them. We might not have processed the SIGCHLD even of a
2518 previous invocation and we don't want to maintain a
2519 unbounded *per-child* event queue, hence we really don't
2520 want anything flushed out of the kernel's queue that we
2521 don't care about. Since this is O(n) this means that if you
2522 have a lot of processes you probably want to handle SIGCHLD
2525 We do not reap the children here (by using WNOWAIT), this
2526 is only done after the event source is dispatched so that
2527 the callback still sees the process as a zombie.
2530 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2531 assert(s
->type
== SOURCE_CHILD
);
2536 if (s
->enabled
== SD_EVENT_OFF
)
2539 zero(s
->child
.siginfo
);
2540 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2541 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2545 if (s
->child
.siginfo
.si_pid
!= 0) {
2546 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2548 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2549 /* If the child isn't dead then let's
2550 * immediately remove the state change
2551 * from the queue, since there's no
2552 * benefit in leaving it queued */
2554 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2555 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2558 r
= source_set_pending(s
, true);
2567 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2568 bool read_one
= false;
2573 assert_return(events
== EPOLLIN
, -EIO
);
2575 /* If there's a signal queued on this priority and SIGCHLD is
2576 on this priority too, then make sure to recheck the
2577 children we watch. This is because we only ever dequeue
2578 the first signal per priority, and if we dequeue one, and
2579 SIGCHLD might be enqueued later we wouldn't know, but we
2580 might have higher priority children we care about hence we
2581 need to check that explicitly. */
2583 if (sigismember(&d
->sigset
, SIGCHLD
))
2584 e
->need_process_child
= true;
2586 /* If there's already an event source pending for this
2587 * priority we don't read another */
2592 struct signalfd_siginfo si
;
2594 sd_event_source
*s
= NULL
;
2596 n
= read(d
->fd
, &si
, sizeof(si
));
2598 if (IN_SET(errno
, EAGAIN
, EINTR
))
2604 if (_unlikely_(n
!= sizeof(si
)))
2607 assert(SIGNAL_VALID(si
.ssi_signo
));
2611 if (e
->signal_sources
)
2612 s
= e
->signal_sources
[si
.ssi_signo
];
2618 s
->signal
.siginfo
= si
;
2621 r
= source_set_pending(s
, true);
2629 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
) {
2635 assert_return(revents
== EPOLLIN
, -EIO
);
2637 /* If there's already an event source pending for this priority, don't read another */
2638 if (d
->n_pending
> 0)
2641 /* Is the read buffer non-empty? If so, let's not read more */
2642 if (d
->buffer_filled
> 0)
2645 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
2647 if (IN_SET(errno
, EAGAIN
, EINTR
))
2654 d
->buffer_filled
= (size_t) n
;
2655 LIST_PREPEND(buffered
, e
->inotify_data_buffered
, d
);
2660 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
2663 assert(sz
<= d
->buffer_filled
);
2668 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2669 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
2670 d
->buffer_filled
-= sz
;
2672 if (d
->buffer_filled
== 0)
2673 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
2676 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
2682 /* If there's already an event source pending for this priority, don't read another */
2683 if (d
->n_pending
> 0)
2686 while (d
->buffer_filled
> 0) {
2689 /* Let's validate that the event structures are complete */
2690 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
2693 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2694 if (d
->buffer_filled
< sz
)
2697 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
2698 struct inode_data
*inode_data
;
2701 /* The queue overran, let's pass this event to all event sources connected to this inotify
2704 HASHMAP_FOREACH(inode_data
, d
->inodes
, i
) {
2707 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2709 if (s
->enabled
== SD_EVENT_OFF
)
2712 r
= source_set_pending(s
, true);
2718 struct inode_data
*inode_data
;
2721 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2722 * our watch descriptor table. */
2723 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
2725 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2727 event_inotify_data_drop(e
, d
, sz
);
2731 /* The watch descriptor was removed by the kernel, let's drop it here too */
2732 inode_data
->wd
= -1;
2734 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2736 event_inotify_data_drop(e
, d
, sz
);
2741 /* Trigger all event sources that are interested in these events. Also trigger all event
2742 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2743 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2745 if (s
->enabled
== SD_EVENT_OFF
)
2748 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
2749 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
2752 r
= source_set_pending(s
, true);
2758 /* Something pending now? If so, let's finish, otherwise let's read more. */
2759 if (d
->n_pending
> 0)
2766 static int process_inotify(sd_event
*e
) {
2767 struct inotify_data
*d
;
2772 LIST_FOREACH(buffered
, d
, e
->inotify_data_buffered
) {
2773 r
= event_inotify_data_process(e
, d
);
2783 static int source_dispatch(sd_event_source
*s
) {
2784 EventSourceType saved_type
;
2788 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2790 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2792 saved_type
= s
->type
;
2794 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2795 r
= source_set_pending(s
, false);
2800 if (s
->type
!= SOURCE_POST
) {
2804 /* If we execute a non-post source, let's mark all
2805 * post sources as pending */
2807 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2808 if (z
->enabled
== SD_EVENT_OFF
)
2811 r
= source_set_pending(z
, true);
2817 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2818 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2823 s
->dispatching
= true;
2828 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2831 case SOURCE_TIME_REALTIME
:
2832 case SOURCE_TIME_BOOTTIME
:
2833 case SOURCE_TIME_MONOTONIC
:
2834 case SOURCE_TIME_REALTIME_ALARM
:
2835 case SOURCE_TIME_BOOTTIME_ALARM
:
2836 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2840 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2843 case SOURCE_CHILD
: {
2846 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2848 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2850 /* Now, reap the PID for good. */
2852 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2858 r
= s
->defer
.callback(s
, s
->userdata
);
2862 r
= s
->post
.callback(s
, s
->userdata
);
2866 r
= s
->exit
.callback(s
, s
->userdata
);
2869 case SOURCE_INOTIFY
: {
2870 struct sd_event
*e
= s
->event
;
2871 struct inotify_data
*d
;
2874 assert(s
->inotify
.inode_data
);
2875 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
2877 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
2878 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2879 assert(d
->buffer_filled
>= sz
);
2881 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
2883 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2885 if (d
->n_pending
== 0)
2886 event_inotify_data_drop(e
, d
, sz
);
2891 case SOURCE_WATCHDOG
:
2892 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2893 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2894 assert_not_reached("Wut? I shouldn't exist.");
2897 s
->dispatching
= false;
2900 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
2901 strna(s
->description
), event_source_type_to_string(saved_type
));
2906 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2911 static int event_prepare(sd_event
*e
) {
2919 s
= prioq_peek(e
->prepare
);
2920 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2923 s
->prepare_iteration
= e
->iteration
;
2924 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2930 s
->dispatching
= true;
2931 r
= s
->prepare(s
, s
->userdata
);
2932 s
->dispatching
= false;
2935 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2936 strna(s
->description
), event_source_type_to_string(s
->type
));
2941 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2947 static int dispatch_exit(sd_event
*e
) {
2949 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
2954 p
= prioq_peek(e
->exit
);
2955 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2956 e
->state
= SD_EVENT_FINISHED
;
2960 ref
= sd_event_ref(e
);
2962 e
->state
= SD_EVENT_EXITING
;
2963 r
= source_dispatch(p
);
2964 e
->state
= SD_EVENT_INITIAL
;
2968 static sd_event_source
* event_next_pending(sd_event
*e
) {
2973 p
= prioq_peek(e
->pending
);
2977 if (p
->enabled
== SD_EVENT_OFF
)
2983 static int arm_watchdog(sd_event
*e
) {
2984 struct itimerspec its
= {};
2989 assert(e
->watchdog_fd
>= 0);
2991 t
= sleep_between(e
,
2992 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2993 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2995 timespec_store(&its
.it_value
, t
);
2997 /* Make sure we never set the watchdog to 0, which tells the
2998 * kernel to disable it. */
2999 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
3000 its
.it_value
.tv_nsec
= 1;
3002 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
3009 static int process_watchdog(sd_event
*e
) {
3015 /* Don't notify watchdog too often */
3016 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
3019 sd_notify(false, "WATCHDOG=1");
3020 e
->watchdog_last
= e
->timestamp
.monotonic
;
3022 return arm_watchdog(e
);
3025 static void event_close_inode_data_fds(sd_event
*e
) {
3026 struct inode_data
*d
;
3030 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3031 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3032 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3033 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3036 while ((d
= e
->inode_data_to_close
)) {
3038 d
->fd
= safe_close(d
->fd
);
3040 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
3044 _public_
int sd_event_prepare(sd_event
*e
) {
3047 assert_return(e
, -EINVAL
);
3048 assert_return(e
= event_resolve(e
), -ENOPKG
);
3049 assert_return(!event_pid_changed(e
), -ECHILD
);
3050 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3051 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3053 if (e
->exit_requested
)
3058 e
->state
= SD_EVENT_PREPARING
;
3059 r
= event_prepare(e
);
3060 e
->state
= SD_EVENT_INITIAL
;
3064 r
= event_arm_timer(e
, &e
->realtime
);
3068 r
= event_arm_timer(e
, &e
->boottime
);
3072 r
= event_arm_timer(e
, &e
->monotonic
);
3076 r
= event_arm_timer(e
, &e
->realtime_alarm
);
3080 r
= event_arm_timer(e
, &e
->boottime_alarm
);
3084 event_close_inode_data_fds(e
);
3086 if (event_next_pending(e
) || e
->need_process_child
)
3089 e
->state
= SD_EVENT_ARMED
;
3094 e
->state
= SD_EVENT_ARMED
;
3095 r
= sd_event_wait(e
, 0);
3097 e
->state
= SD_EVENT_ARMED
;
3102 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
3103 struct epoll_event
*ev_queue
;
3104 unsigned ev_queue_max
;
3107 assert_return(e
, -EINVAL
);
3108 assert_return(e
= event_resolve(e
), -ENOPKG
);
3109 assert_return(!event_pid_changed(e
), -ECHILD
);
3110 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3111 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
3113 if (e
->exit_requested
) {
3114 e
->state
= SD_EVENT_PENDING
;
3118 ev_queue_max
= MAX(e
->n_sources
, 1u);
3119 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
3121 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3122 if (e
->inotify_data_buffered
)
3125 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
3126 timeout
== (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout
, USEC_PER_MSEC
));
3128 if (errno
== EINTR
) {
3129 e
->state
= SD_EVENT_PENDING
;
3137 triple_timestamp_get(&e
->timestamp
);
3139 for (i
= 0; i
< m
; i
++) {
3141 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
3142 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
3144 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
3148 case WAKEUP_EVENT_SOURCE
:
3149 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3152 case WAKEUP_CLOCK_DATA
: {
3153 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
3154 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
3158 case WAKEUP_SIGNAL_DATA
:
3159 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3162 case WAKEUP_INOTIFY_DATA
:
3163 r
= event_inotify_data_read(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3167 assert_not_reached("Invalid wake-up pointer");
3174 r
= process_watchdog(e
);
3178 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
3182 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
3186 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
3190 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
3194 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
3198 if (e
->need_process_child
) {
3199 r
= process_child(e
);
3204 r
= process_inotify(e
);
3208 if (event_next_pending(e
)) {
3209 e
->state
= SD_EVENT_PENDING
;
3217 e
->state
= SD_EVENT_INITIAL
;
3222 _public_
int sd_event_dispatch(sd_event
*e
) {
3226 assert_return(e
, -EINVAL
);
3227 assert_return(e
= event_resolve(e
), -ENOPKG
);
3228 assert_return(!event_pid_changed(e
), -ECHILD
);
3229 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3230 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
3232 if (e
->exit_requested
)
3233 return dispatch_exit(e
);
3235 p
= event_next_pending(e
);
3237 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3239 ref
= sd_event_ref(e
);
3240 e
->state
= SD_EVENT_RUNNING
;
3241 r
= source_dispatch(p
);
3242 e
->state
= SD_EVENT_INITIAL
;
3246 e
->state
= SD_EVENT_INITIAL
;
3251 static void event_log_delays(sd_event
*e
) {
3252 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1], *p
;
3257 for (i
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
3258 l
= strpcpyf(&p
, l
, "%u ", e
->delays
[i
]);
3261 log_debug("Event loop iterations: %s", b
);
3264 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
3267 assert_return(e
, -EINVAL
);
3268 assert_return(e
= event_resolve(e
), -ENOPKG
);
3269 assert_return(!event_pid_changed(e
), -ECHILD
);
3270 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3271 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3273 if (e
->profile_delays
&& e
->last_run
) {
3277 this_run
= now(CLOCK_MONOTONIC
);
3279 l
= u64log2(this_run
- e
->last_run
);
3280 assert(l
< sizeof(e
->delays
));
3283 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
3284 event_log_delays(e
);
3285 e
->last_log
= this_run
;
3289 r
= sd_event_prepare(e
);
3291 /* There was nothing? Then wait... */
3292 r
= sd_event_wait(e
, timeout
);
3294 if (e
->profile_delays
)
3295 e
->last_run
= now(CLOCK_MONOTONIC
);
3298 /* There's something now, then let's dispatch it */
3299 r
= sd_event_dispatch(e
);
3309 _public_
int sd_event_loop(sd_event
*e
) {
3310 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3313 assert_return(e
, -EINVAL
);
3314 assert_return(e
= event_resolve(e
), -ENOPKG
);
3315 assert_return(!event_pid_changed(e
), -ECHILD
);
3316 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3318 ref
= sd_event_ref(e
);
3320 while (e
->state
!= SD_EVENT_FINISHED
) {
3321 r
= sd_event_run(e
, (uint64_t) -1);
3326 return e
->exit_code
;
3329 _public_
int sd_event_get_fd(sd_event
*e
) {
3331 assert_return(e
, -EINVAL
);
3332 assert_return(e
= event_resolve(e
), -ENOPKG
);
3333 assert_return(!event_pid_changed(e
), -ECHILD
);
3338 _public_
int sd_event_get_state(sd_event
*e
) {
3339 assert_return(e
, -EINVAL
);
3340 assert_return(e
= event_resolve(e
), -ENOPKG
);
3341 assert_return(!event_pid_changed(e
), -ECHILD
);
3346 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
3347 assert_return(e
, -EINVAL
);
3348 assert_return(e
= event_resolve(e
), -ENOPKG
);
3349 assert_return(code
, -EINVAL
);
3350 assert_return(!event_pid_changed(e
), -ECHILD
);
3352 if (!e
->exit_requested
)
3355 *code
= e
->exit_code
;
3359 _public_
int sd_event_exit(sd_event
*e
, int code
) {
3360 assert_return(e
, -EINVAL
);
3361 assert_return(e
= event_resolve(e
), -ENOPKG
);
3362 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3363 assert_return(!event_pid_changed(e
), -ECHILD
);
3365 e
->exit_requested
= true;
3366 e
->exit_code
= code
;
3371 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
3372 assert_return(e
, -EINVAL
);
3373 assert_return(e
= event_resolve(e
), -ENOPKG
);
3374 assert_return(usec
, -EINVAL
);
3375 assert_return(!event_pid_changed(e
), -ECHILD
);
3377 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
3380 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3381 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3382 * the purpose of getting the time this doesn't matter. */
3383 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
3386 if (!triple_timestamp_is_set(&e
->timestamp
)) {
3387 /* Implicitly fall back to now() if we never ran
3388 * before and thus have no cached time. */
3393 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
3397 _public_
int sd_event_default(sd_event
**ret
) {
3402 return !!default_event
;
3404 if (default_event
) {
3405 *ret
= sd_event_ref(default_event
);
3409 r
= sd_event_new(&e
);
3413 e
->default_event_ptr
= &default_event
;
3421 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
3422 assert_return(e
, -EINVAL
);
3423 assert_return(e
= event_resolve(e
), -ENOPKG
);
3424 assert_return(tid
, -EINVAL
);
3425 assert_return(!event_pid_changed(e
), -ECHILD
);
3435 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
3438 assert_return(e
, -EINVAL
);
3439 assert_return(e
= event_resolve(e
), -ENOPKG
);
3440 assert_return(!event_pid_changed(e
), -ECHILD
);
3442 if (e
->watchdog
== !!b
)
3446 struct epoll_event ev
;
3448 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
3452 /* Issue first ping immediately */
3453 sd_notify(false, "WATCHDOG=1");
3454 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
3456 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
3457 if (e
->watchdog_fd
< 0)
3460 r
= arm_watchdog(e
);
3464 ev
= (struct epoll_event
) {
3466 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
3469 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
3476 if (e
->watchdog_fd
>= 0) {
3477 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
3478 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3486 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3490 _public_
int sd_event_get_watchdog(sd_event
*e
) {
3491 assert_return(e
, -EINVAL
);
3492 assert_return(e
= event_resolve(e
), -ENOPKG
);
3493 assert_return(!event_pid_changed(e
), -ECHILD
);
3498 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
3499 assert_return(e
, -EINVAL
);
3500 assert_return(e
= event_resolve(e
), -ENOPKG
);
3501 assert_return(!event_pid_changed(e
), -ECHILD
);
3503 *ret
= e
->iteration
;
3507 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
3508 assert_return(s
, -EINVAL
);
3510 s
->destroy_callback
= callback
;
3514 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
3515 assert_return(s
, -EINVAL
);
3518 *ret
= s
->destroy_callback
;
3520 return !!s
->destroy_callback
;
3523 _public_
int sd_event_source_get_floating(sd_event_source
*s
) {
3524 assert_return(s
, -EINVAL
);
3529 _public_
int sd_event_source_set_floating(sd_event_source
*s
, int b
) {
3530 assert_return(s
, -EINVAL
);
3532 if (s
->floating
== !!b
)
3535 if (!s
->event
) /* Already disconnected */
3541 sd_event_source_ref(s
);
3542 sd_event_unref(s
->event
);
3544 sd_event_ref(s
->event
);
3545 sd_event_source_unref(s
);