1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <sys/timerfd.h>
11 #include "alloc-util.h"
19 #include "process-util.h"
21 #include "signal-util.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "time-util.h"
27 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
29 typedef enum EventSourceType
{
33 SOURCE_TIME_MONOTONIC
,
34 SOURCE_TIME_REALTIME_ALARM
,
35 SOURCE_TIME_BOOTTIME_ALARM
,
43 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
44 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
47 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
49 [SOURCE_TIME_REALTIME
] = "realtime",
50 [SOURCE_TIME_BOOTTIME
] = "bootime",
51 [SOURCE_TIME_MONOTONIC
] = "monotonic",
52 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
53 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
54 [SOURCE_SIGNAL
] = "signal",
55 [SOURCE_CHILD
] = "child",
56 [SOURCE_DEFER
] = "defer",
57 [SOURCE_POST
] = "post",
58 [SOURCE_EXIT
] = "exit",
59 [SOURCE_WATCHDOG
] = "watchdog",
60 [SOURCE_INOTIFY
] = "inotify",
63 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
65 /* All objects we use in epoll events start with this value, so that
66 * we know how to dispatch it */
67 typedef enum WakeupType
{
74 _WAKEUP_TYPE_INVALID
= -1,
77 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
81 struct sd_event_source
{
88 sd_event_handler_t prepare
;
92 EventSourceType type
:5;
99 unsigned pending_index
;
100 unsigned prepare_index
;
101 uint64_t pending_iteration
;
102 uint64_t prepare_iteration
;
104 sd_event_destroy_t destroy_callback
;
106 LIST_FIELDS(sd_event_source
, sources
);
110 sd_event_io_handler_t callback
;
118 sd_event_time_handler_t callback
;
119 usec_t next
, accuracy
;
120 unsigned earliest_index
;
121 unsigned latest_index
;
124 sd_event_signal_handler_t callback
;
125 struct signalfd_siginfo siginfo
;
129 sd_event_child_handler_t callback
;
135 sd_event_handler_t callback
;
138 sd_event_handler_t callback
;
141 sd_event_handler_t callback
;
142 unsigned prioq_index
;
145 sd_event_inotify_handler_t callback
;
147 struct inode_data
*inode_data
;
148 LIST_FIELDS(sd_event_source
, by_inode_data
);
157 /* For all clocks we maintain two priority queues each, one
158 * ordered for the earliest times the events may be
159 * dispatched, and one ordered by the latest times they must
160 * have been dispatched. The range between the top entries in
161 * the two prioqs is the time window we can freely schedule
174 /* For each priority we maintain one signal fd, so that we
175 * only have to dequeue a single event per priority at a
181 sd_event_source
*current
;
184 /* A structure listing all event sources currently watching a specific inode */
186 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
190 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
191 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
192 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
193 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
194 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
198 /* The inotify "watch descriptor" */
201 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
202 * most recently been set on the watch descriptor. */
203 uint32_t combined_mask
;
205 /* All event sources subscribed to this inode */
206 LIST_HEAD(sd_event_source
, event_sources
);
208 /* The inotify object we watch this inode with */
209 struct inotify_data
*inotify_data
;
211 /* A linked list of all inode data objects with fds to close (see above) */
212 LIST_FIELDS(struct inode_data
, to_close
);
215 /* A structure encapsulating an inotify fd */
216 struct inotify_data
{
219 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
225 Hashmap
*inodes
; /* The inode_data structures keyed by dev+ino */
226 Hashmap
*wd
; /* The inode_data structures keyed by the watch descriptor for each */
228 /* The buffer we read inotify events into */
229 union inotify_event_buffer buffer
;
230 size_t buffer_filled
; /* fill level of the buffer */
232 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
233 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
234 * the events locally if they can't be coalesced). */
237 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
238 * to make it efficient to figure out what inotify objects to process data on next. */
239 LIST_FIELDS(struct inotify_data
, buffered
);
251 /* timerfd_create() only supports these five clocks so far. We
252 * can add support for more clocks when the kernel learns to
253 * deal with them, too. */
254 struct clock_data realtime
;
255 struct clock_data boottime
;
256 struct clock_data monotonic
;
257 struct clock_data realtime_alarm
;
258 struct clock_data boottime_alarm
;
262 sd_event_source
**signal_sources
; /* indexed by signal number */
263 Hashmap
*signal_data
; /* indexed by priority */
265 Hashmap
*child_sources
;
266 unsigned n_enabled_child_sources
;
272 Hashmap
*inotify_data
; /* indexed by priority */
274 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
275 LIST_HEAD(struct inode_data
, inode_data_to_close
);
277 /* A list of inotify objects that already have events buffered which aren't processed yet */
278 LIST_HEAD(struct inotify_data
, inotify_data_buffered
);
283 triple_timestamp timestamp
;
286 bool exit_requested
:1;
287 bool need_process_child
:1;
289 bool profile_delays
:1;
294 sd_event
**default_event_ptr
;
296 usec_t watchdog_last
, watchdog_period
;
300 LIST_HEAD(sd_event_source
, sources
);
302 usec_t last_run
, last_log
;
303 unsigned delays
[sizeof(usec_t
) * 8];
306 static thread_local sd_event
*default_event
= NULL
;
308 static void source_disconnect(sd_event_source
*s
);
309 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
311 static sd_event
*event_resolve(sd_event
*e
) {
312 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
315 static int pending_prioq_compare(const void *a
, const void *b
) {
316 const sd_event_source
*x
= a
, *y
= b
;
321 /* Enabled ones first */
322 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
324 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
327 /* Lower priority values first */
328 if (x
->priority
< y
->priority
)
330 if (x
->priority
> y
->priority
)
333 /* Older entries first */
334 if (x
->pending_iteration
< y
->pending_iteration
)
336 if (x
->pending_iteration
> y
->pending_iteration
)
342 static int prepare_prioq_compare(const void *a
, const void *b
) {
343 const sd_event_source
*x
= a
, *y
= b
;
348 /* Enabled ones first */
349 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
351 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
354 /* Move most recently prepared ones last, so that we can stop
355 * preparing as soon as we hit one that has already been
356 * prepared in the current iteration */
357 if (x
->prepare_iteration
< y
->prepare_iteration
)
359 if (x
->prepare_iteration
> y
->prepare_iteration
)
362 /* Lower priority values first */
363 if (x
->priority
< y
->priority
)
365 if (x
->priority
> y
->priority
)
371 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
372 const sd_event_source
*x
= a
, *y
= b
;
374 assert(EVENT_SOURCE_IS_TIME(x
->type
));
375 assert(x
->type
== y
->type
);
377 /* Enabled ones first */
378 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
380 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
383 /* Move the pending ones to the end */
384 if (!x
->pending
&& y
->pending
)
386 if (x
->pending
&& !y
->pending
)
390 if (x
->time
.next
< y
->time
.next
)
392 if (x
->time
.next
> y
->time
.next
)
398 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
399 return usec_add(s
->time
.next
, s
->time
.accuracy
);
402 static int latest_time_prioq_compare(const void *a
, const void *b
) {
403 const sd_event_source
*x
= a
, *y
= b
;
405 assert(EVENT_SOURCE_IS_TIME(x
->type
));
406 assert(x
->type
== y
->type
);
408 /* Enabled ones first */
409 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
411 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
414 /* Move the pending ones to the end */
415 if (!x
->pending
&& y
->pending
)
417 if (x
->pending
&& !y
->pending
)
421 if (time_event_source_latest(x
) < time_event_source_latest(y
))
423 if (time_event_source_latest(x
) > time_event_source_latest(y
))
429 static int exit_prioq_compare(const void *a
, const void *b
) {
430 const sd_event_source
*x
= a
, *y
= b
;
432 assert(x
->type
== SOURCE_EXIT
);
433 assert(y
->type
== SOURCE_EXIT
);
435 /* Enabled ones first */
436 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
438 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
441 /* Lower priority values first */
442 if (x
->priority
< y
->priority
)
444 if (x
->priority
> y
->priority
)
450 static void free_clock_data(struct clock_data
*d
) {
452 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
455 prioq_free(d
->earliest
);
456 prioq_free(d
->latest
);
459 static sd_event
*event_free(sd_event
*e
) {
464 while ((s
= e
->sources
)) {
466 source_disconnect(s
);
467 sd_event_source_unref(s
);
470 assert(e
->n_sources
== 0);
472 if (e
->default_event_ptr
)
473 *(e
->default_event_ptr
) = NULL
;
475 safe_close(e
->epoll_fd
);
476 safe_close(e
->watchdog_fd
);
478 free_clock_data(&e
->realtime
);
479 free_clock_data(&e
->boottime
);
480 free_clock_data(&e
->monotonic
);
481 free_clock_data(&e
->realtime_alarm
);
482 free_clock_data(&e
->boottime_alarm
);
484 prioq_free(e
->pending
);
485 prioq_free(e
->prepare
);
488 free(e
->signal_sources
);
489 hashmap_free(e
->signal_data
);
491 hashmap_free(e
->inotify_data
);
493 hashmap_free(e
->child_sources
);
494 set_free(e
->post_sources
);
499 _public_
int sd_event_new(sd_event
** ret
) {
503 assert_return(ret
, -EINVAL
);
505 e
= new(sd_event
, 1);
513 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
515 .realtime
.next
= USEC_INFINITY
,
516 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
518 .boottime
.next
= USEC_INFINITY
,
519 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
521 .monotonic
.next
= USEC_INFINITY
,
522 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
523 .realtime_alarm
.fd
= -1,
524 .realtime_alarm
.next
= USEC_INFINITY
,
525 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
526 .boottime_alarm
.fd
= -1,
527 .boottime_alarm
.next
= USEC_INFINITY
,
528 .perturb
= USEC_INFINITY
,
529 .original_pid
= getpid_cached(),
532 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
536 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
537 if (e
->epoll_fd
< 0) {
542 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
544 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
545 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
546 e
->profile_delays
= true;
557 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event
, sd_event
, event_free
);
559 static bool event_pid_changed(sd_event
*e
) {
562 /* We don't support people creating an event loop and keeping
563 * it around over a fork(). Let's complain. */
565 return e
->original_pid
!= getpid_cached();
568 static void source_io_unregister(sd_event_source
*s
) {
572 assert(s
->type
== SOURCE_IO
);
574 if (event_pid_changed(s
->event
))
577 if (!s
->io
.registered
)
580 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
582 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
583 strna(s
->description
), event_source_type_to_string(s
->type
));
585 s
->io
.registered
= false;
588 static int source_io_register(
593 struct epoll_event ev
;
597 assert(s
->type
== SOURCE_IO
);
598 assert(enabled
!= SD_EVENT_OFF
);
600 ev
= (struct epoll_event
) {
601 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
605 if (s
->io
.registered
)
606 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
608 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
612 s
->io
.registered
= true;
617 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
621 case SOURCE_TIME_REALTIME
:
622 return CLOCK_REALTIME
;
624 case SOURCE_TIME_BOOTTIME
:
625 return CLOCK_BOOTTIME
;
627 case SOURCE_TIME_MONOTONIC
:
628 return CLOCK_MONOTONIC
;
630 case SOURCE_TIME_REALTIME_ALARM
:
631 return CLOCK_REALTIME_ALARM
;
633 case SOURCE_TIME_BOOTTIME_ALARM
:
634 return CLOCK_BOOTTIME_ALARM
;
637 return (clockid_t
) -1;
641 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
646 return SOURCE_TIME_REALTIME
;
649 return SOURCE_TIME_BOOTTIME
;
651 case CLOCK_MONOTONIC
:
652 return SOURCE_TIME_MONOTONIC
;
654 case CLOCK_REALTIME_ALARM
:
655 return SOURCE_TIME_REALTIME_ALARM
;
657 case CLOCK_BOOTTIME_ALARM
:
658 return SOURCE_TIME_BOOTTIME_ALARM
;
661 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
665 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
670 case SOURCE_TIME_REALTIME
:
673 case SOURCE_TIME_BOOTTIME
:
676 case SOURCE_TIME_MONOTONIC
:
677 return &e
->monotonic
;
679 case SOURCE_TIME_REALTIME_ALARM
:
680 return &e
->realtime_alarm
;
682 case SOURCE_TIME_BOOTTIME_ALARM
:
683 return &e
->boottime_alarm
;
690 static int event_make_signal_data(
693 struct signal_data
**ret
) {
695 struct epoll_event ev
;
696 struct signal_data
*d
;
704 if (event_pid_changed(e
))
707 if (e
->signal_sources
&& e
->signal_sources
[sig
])
708 priority
= e
->signal_sources
[sig
]->priority
;
710 priority
= SD_EVENT_PRIORITY_NORMAL
;
712 d
= hashmap_get(e
->signal_data
, &priority
);
714 if (sigismember(&d
->sigset
, sig
) > 0) {
720 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
724 d
= new(struct signal_data
, 1);
728 *d
= (struct signal_data
) {
729 .wakeup
= WAKEUP_SIGNAL_DATA
,
731 .priority
= priority
,
734 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
744 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
746 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
760 d
->fd
= fd_move_above_stdio(r
);
762 ev
= (struct epoll_event
) {
767 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
780 d
->fd
= safe_close(d
->fd
);
781 hashmap_remove(e
->signal_data
, &d
->priority
);
788 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
792 /* Turns off the specified signal in the signal data
793 * object. If the signal mask of the object becomes empty that
796 if (sigismember(&d
->sigset
, sig
) == 0)
799 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
801 if (sigisemptyset(&d
->sigset
)) {
803 /* If all the mask is all-zero we can get rid of the structure */
804 hashmap_remove(e
->signal_data
, &d
->priority
);
812 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
813 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
816 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
817 struct signal_data
*d
;
818 static const int64_t zero_priority
= 0;
822 /* Rechecks if the specified signal is still something we are
823 * interested in. If not, we'll unmask it, and possibly drop
824 * the signalfd for it. */
826 if (sig
== SIGCHLD
&&
827 e
->n_enabled_child_sources
> 0)
830 if (e
->signal_sources
&&
831 e
->signal_sources
[sig
] &&
832 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
836 * The specified signal might be enabled in three different queues:
838 * 1) the one that belongs to the priority passed (if it is non-NULL)
839 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
840 * 3) the 0 priority (to cover the SIGCHLD case)
842 * Hence, let's remove it from all three here.
846 d
= hashmap_get(e
->signal_data
, priority
);
848 event_unmask_signal_data(e
, d
, sig
);
851 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
852 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
854 event_unmask_signal_data(e
, d
, sig
);
857 d
= hashmap_get(e
->signal_data
, &zero_priority
);
859 event_unmask_signal_data(e
, d
, sig
);
862 static void source_disconnect(sd_event_source
*s
) {
870 assert(s
->event
->n_sources
> 0);
876 source_io_unregister(s
);
880 case SOURCE_TIME_REALTIME
:
881 case SOURCE_TIME_BOOTTIME
:
882 case SOURCE_TIME_MONOTONIC
:
883 case SOURCE_TIME_REALTIME_ALARM
:
884 case SOURCE_TIME_BOOTTIME_ALARM
: {
885 struct clock_data
*d
;
887 d
= event_get_clock_data(s
->event
, s
->type
);
890 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
891 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
892 d
->needs_rearm
= true;
897 if (s
->signal
.sig
> 0) {
899 if (s
->event
->signal_sources
)
900 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
902 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
908 if (s
->child
.pid
> 0) {
909 if (s
->enabled
!= SD_EVENT_OFF
) {
910 assert(s
->event
->n_enabled_child_sources
> 0);
911 s
->event
->n_enabled_child_sources
--;
914 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
915 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
925 set_remove(s
->event
->post_sources
, s
);
929 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
932 case SOURCE_INOTIFY
: {
933 struct inode_data
*inode_data
;
935 inode_data
= s
->inotify
.inode_data
;
937 struct inotify_data
*inotify_data
;
938 assert_se(inotify_data
= inode_data
->inotify_data
);
940 /* Detach this event source from the inode object */
941 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
942 s
->inotify
.inode_data
= NULL
;
945 assert(inotify_data
->n_pending
> 0);
946 inotify_data
->n_pending
--;
949 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
950 * continued to being watched. That's because inotify doesn't really have an API for that: we
951 * can only change watch masks with access to the original inode either by fd or by path. But
952 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
953 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
954 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
955 * there), but given the need for open_by_handle_at() which is privileged and not universally
956 * available this would be quite an incomplete solution. Hence we go the other way, leave the
957 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
958 * anymore after reception. Yes, this sucks, but … Linux … */
960 /* Maybe release the inode data (and its inotify) */
961 event_gc_inode_data(s
->event
, inode_data
);
968 assert_not_reached("Wut? I shouldn't exist.");
972 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
975 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
979 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
981 LIST_REMOVE(sources
, event
->sources
, s
);
985 sd_event_unref(event
);
988 static void source_free(sd_event_source
*s
) {
991 source_disconnect(s
);
993 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
994 s
->io
.fd
= safe_close(s
->io
.fd
);
996 if (s
->destroy_callback
)
997 s
->destroy_callback(s
->userdata
);
999 free(s
->description
);
1002 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source
*, source_free
);
1004 static int source_set_pending(sd_event_source
*s
, bool b
) {
1008 assert(s
->type
!= SOURCE_EXIT
);
1010 if (s
->pending
== b
)
1016 s
->pending_iteration
= s
->event
->iteration
;
1018 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
1024 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
1026 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
1027 struct clock_data
*d
;
1029 d
= event_get_clock_data(s
->event
, s
->type
);
1032 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1033 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1034 d
->needs_rearm
= true;
1037 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
1038 struct signal_data
*d
;
1040 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
1041 if (d
&& d
->current
== s
)
1045 if (s
->type
== SOURCE_INOTIFY
) {
1047 assert(s
->inotify
.inode_data
);
1048 assert(s
->inotify
.inode_data
->inotify_data
);
1051 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
1053 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
1054 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
1061 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
1066 s
= new(sd_event_source
, 1);
1070 *s
= (struct sd_event_source
) {
1073 .floating
= floating
,
1075 .pending_index
= PRIOQ_IDX_NULL
,
1076 .prepare_index
= PRIOQ_IDX_NULL
,
1082 LIST_PREPEND(sources
, e
->sources
, s
);
1088 _public_
int sd_event_add_io(
1090 sd_event_source
**ret
,
1093 sd_event_io_handler_t callback
,
1096 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1099 assert_return(e
, -EINVAL
);
1100 assert_return(e
= event_resolve(e
), -ENOPKG
);
1101 assert_return(fd
>= 0, -EBADF
);
1102 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1103 assert_return(callback
, -EINVAL
);
1104 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1105 assert_return(!event_pid_changed(e
), -ECHILD
);
1107 s
= source_new(e
, !ret
, SOURCE_IO
);
1111 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1113 s
->io
.events
= events
;
1114 s
->io
.callback
= callback
;
1115 s
->userdata
= userdata
;
1116 s
->enabled
= SD_EVENT_ON
;
1118 r
= source_io_register(s
, s
->enabled
, events
);
1129 static void initialize_perturb(sd_event
*e
) {
1130 sd_id128_t bootid
= {};
1132 /* When we sleep for longer, we try to realign the wakeup to
1133 the same time within each minute/second/250ms, so that
1134 events all across the system can be coalesced into a single
1135 CPU wakeup. However, let's take some system-specific
1136 randomness for this value, so that in a network of systems
1137 with synced clocks timer events are distributed a
1138 bit. Here, we calculate a perturbation usec offset from the
1141 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1144 if (sd_id128_get_boot(&bootid
) >= 0)
1145 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1148 static int event_setup_timer_fd(
1150 struct clock_data
*d
,
1153 struct epoll_event ev
;
1159 if (_likely_(d
->fd
>= 0))
1162 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1166 fd
= fd_move_above_stdio(fd
);
1168 ev
= (struct epoll_event
) {
1173 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1183 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1186 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1189 _public_
int sd_event_add_time(
1191 sd_event_source
**ret
,
1195 sd_event_time_handler_t callback
,
1198 EventSourceType type
;
1199 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1200 struct clock_data
*d
;
1203 assert_return(e
, -EINVAL
);
1204 assert_return(e
= event_resolve(e
), -ENOPKG
);
1205 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1206 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1207 assert_return(!event_pid_changed(e
), -ECHILD
);
1209 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1212 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1217 callback
= time_exit_callback
;
1219 d
= event_get_clock_data(e
, type
);
1222 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1226 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1231 r
= event_setup_timer_fd(e
, d
, clock
);
1236 s
= source_new(e
, !ret
, type
);
1240 s
->time
.next
= usec
;
1241 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1242 s
->time
.callback
= callback
;
1243 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1244 s
->userdata
= userdata
;
1245 s
->enabled
= SD_EVENT_ONESHOT
;
1247 d
->needs_rearm
= true;
1249 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1253 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1264 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1267 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1270 _public_
int sd_event_add_signal(
1272 sd_event_source
**ret
,
1274 sd_event_signal_handler_t callback
,
1277 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1278 struct signal_data
*d
;
1282 assert_return(e
, -EINVAL
);
1283 assert_return(e
= event_resolve(e
), -ENOPKG
);
1284 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1285 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1286 assert_return(!event_pid_changed(e
), -ECHILD
);
1289 callback
= signal_exit_callback
;
1291 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1295 if (!sigismember(&ss
, sig
))
1298 if (!e
->signal_sources
) {
1299 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1300 if (!e
->signal_sources
)
1302 } else if (e
->signal_sources
[sig
])
1305 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1309 s
->signal
.sig
= sig
;
1310 s
->signal
.callback
= callback
;
1311 s
->userdata
= userdata
;
1312 s
->enabled
= SD_EVENT_ON
;
1314 e
->signal_sources
[sig
] = s
;
1316 r
= event_make_signal_data(e
, sig
, &d
);
1320 /* Use the signal name as description for the event source by default */
1321 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1330 _public_
int sd_event_add_child(
1332 sd_event_source
**ret
,
1335 sd_event_child_handler_t callback
,
1338 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1341 assert_return(e
, -EINVAL
);
1342 assert_return(e
= event_resolve(e
), -ENOPKG
);
1343 assert_return(pid
> 1, -EINVAL
);
1344 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1345 assert_return(options
!= 0, -EINVAL
);
1346 assert_return(callback
, -EINVAL
);
1347 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1348 assert_return(!event_pid_changed(e
), -ECHILD
);
1350 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1354 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1357 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1362 s
->child
.options
= options
;
1363 s
->child
.callback
= callback
;
1364 s
->userdata
= userdata
;
1365 s
->enabled
= SD_EVENT_ONESHOT
;
1367 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1371 e
->n_enabled_child_sources
++;
1373 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1375 e
->n_enabled_child_sources
--;
1379 e
->need_process_child
= true;
1388 _public_
int sd_event_add_defer(
1390 sd_event_source
**ret
,
1391 sd_event_handler_t callback
,
1394 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1397 assert_return(e
, -EINVAL
);
1398 assert_return(e
= event_resolve(e
), -ENOPKG
);
1399 assert_return(callback
, -EINVAL
);
1400 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1401 assert_return(!event_pid_changed(e
), -ECHILD
);
1403 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1407 s
->defer
.callback
= callback
;
1408 s
->userdata
= userdata
;
1409 s
->enabled
= SD_EVENT_ONESHOT
;
1411 r
= source_set_pending(s
, true);
1422 _public_
int sd_event_add_post(
1424 sd_event_source
**ret
,
1425 sd_event_handler_t callback
,
1428 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1431 assert_return(e
, -EINVAL
);
1432 assert_return(e
= event_resolve(e
), -ENOPKG
);
1433 assert_return(callback
, -EINVAL
);
1434 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1435 assert_return(!event_pid_changed(e
), -ECHILD
);
1437 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1441 s
= source_new(e
, !ret
, SOURCE_POST
);
1445 s
->post
.callback
= callback
;
1446 s
->userdata
= userdata
;
1447 s
->enabled
= SD_EVENT_ON
;
1449 r
= set_put(e
->post_sources
, s
);
1460 _public_
int sd_event_add_exit(
1462 sd_event_source
**ret
,
1463 sd_event_handler_t callback
,
1466 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1469 assert_return(e
, -EINVAL
);
1470 assert_return(e
= event_resolve(e
), -ENOPKG
);
1471 assert_return(callback
, -EINVAL
);
1472 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1473 assert_return(!event_pid_changed(e
), -ECHILD
);
1475 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1479 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1483 s
->exit
.callback
= callback
;
1484 s
->userdata
= userdata
;
1485 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1486 s
->enabled
= SD_EVENT_ONESHOT
;
1488 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1499 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
1505 assert(hashmap_isempty(d
->inodes
));
1506 assert(hashmap_isempty(d
->wd
));
1508 if (d
->buffer_filled
> 0)
1509 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
1511 hashmap_free(d
->inodes
);
1512 hashmap_free(d
->wd
);
1514 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
1517 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
1518 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
1525 static int event_make_inotify_data(
1528 struct inotify_data
**ret
) {
1530 _cleanup_close_
int fd
= -1;
1531 struct inotify_data
*d
;
1532 struct epoll_event ev
;
1537 d
= hashmap_get(e
->inotify_data
, &priority
);
1544 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
1548 fd
= fd_move_above_stdio(fd
);
1550 r
= hashmap_ensure_allocated(&e
->inotify_data
, &uint64_hash_ops
);
1554 d
= new(struct inotify_data
, 1);
1558 *d
= (struct inotify_data
) {
1559 .wakeup
= WAKEUP_INOTIFY_DATA
,
1561 .priority
= priority
,
1564 r
= hashmap_put(e
->inotify_data
, &d
->priority
, d
);
1566 d
->fd
= safe_close(d
->fd
);
1571 ev
= (struct epoll_event
) {
1576 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
1578 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1579 * remove the fd from the epoll first, which we don't want as we couldn't
1580 * add it in the first place. */
1581 event_free_inotify_data(e
, d
);
1591 static int inode_data_compare(const void *a
, const void *b
) {
1592 const struct inode_data
*x
= a
, *y
= b
;
1597 if (x
->dev
< y
->dev
)
1599 if (x
->dev
> y
->dev
)
1602 if (x
->ino
< y
->ino
)
1604 if (x
->ino
> y
->ino
)
1610 static void inode_data_hash_func(const void *p
, struct siphash
*state
) {
1611 const struct inode_data
*d
= p
;
1615 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
1616 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
1619 const struct hash_ops inode_data_hash_ops
= {
1620 .hash
= inode_data_hash_func
,
1621 .compare
= inode_data_compare
1624 static void event_free_inode_data(
1626 struct inode_data
*d
) {
1633 assert(!d
->event_sources
);
1636 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
1640 if (d
->inotify_data
) {
1643 if (d
->inotify_data
->fd
>= 0) {
1644 /* So here's a problem. At the time this runs the watch descriptor might already be
1645 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1646 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1647 * likely case to happen. */
1649 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
1650 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
1653 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
1656 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
1662 static void event_gc_inode_data(
1664 struct inode_data
*d
) {
1666 struct inotify_data
*inotify_data
;
1673 if (d
->event_sources
)
1676 inotify_data
= d
->inotify_data
;
1677 event_free_inode_data(e
, d
);
1679 if (inotify_data
&& hashmap_isempty(inotify_data
->inodes
))
1680 event_free_inotify_data(e
, inotify_data
);
1683 static int event_make_inode_data(
1685 struct inotify_data
*inotify_data
,
1688 struct inode_data
**ret
) {
1690 struct inode_data
*d
, key
;
1694 assert(inotify_data
);
1696 key
= (struct inode_data
) {
1701 d
= hashmap_get(inotify_data
->inodes
, &key
);
1709 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
1713 d
= new(struct inode_data
, 1);
1717 *d
= (struct inode_data
) {
1722 .inotify_data
= inotify_data
,
1725 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
1737 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
1738 bool excl_unlink
= true;
1739 uint32_t combined
= 0;
1744 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1745 * the IN_EXCL_UNLINK flag is ANDed instead.
1747 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1748 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1749 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1750 * events we don't care for client-side. */
1752 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
1754 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
1755 excl_unlink
= false;
1757 combined
|= s
->inotify
.mask
;
1760 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
1763 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
1764 uint32_t combined_mask
;
1770 combined_mask
= inode_data_determine_mask(d
);
1772 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
1775 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
1779 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
1784 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
1786 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
1792 } else if (d
->wd
!= wd
) {
1794 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1795 (void) inotify_rm_watch(d
->fd
, wd
);
1799 d
->combined_mask
= combined_mask
;
1803 _public_
int sd_event_add_inotify(
1805 sd_event_source
**ret
,
1808 sd_event_inotify_handler_t callback
,
1811 struct inotify_data
*inotify_data
= NULL
;
1812 struct inode_data
*inode_data
= NULL
;
1813 _cleanup_close_
int fd
= -1;
1814 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1818 assert_return(e
, -EINVAL
);
1819 assert_return(e
= event_resolve(e
), -ENOPKG
);
1820 assert_return(path
, -EINVAL
);
1821 assert_return(callback
, -EINVAL
);
1822 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1823 assert_return(!event_pid_changed(e
), -ECHILD
);
1825 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1826 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1827 * the user can't use them for us. */
1828 if (mask
& IN_MASK_ADD
)
1831 fd
= open(path
, O_PATH
|O_CLOEXEC
|
1832 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0)|
1833 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
1837 if (fstat(fd
, &st
) < 0)
1840 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
1844 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
1845 s
->inotify
.mask
= mask
;
1846 s
->inotify
.callback
= callback
;
1847 s
->userdata
= userdata
;
1849 /* Allocate an inotify object for this priority, and an inode object within it */
1850 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
1854 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
1856 event_free_inotify_data(e
, inotify_data
);
1860 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1861 * the event source, until then, for which we need the original inode. */
1862 if (inode_data
->fd
< 0) {
1863 inode_data
->fd
= TAKE_FD(fd
);
1864 LIST_PREPEND(to_close
, e
->inode_data_to_close
, inode_data
);
1867 /* Link our event source to the inode data object */
1868 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1869 s
->inotify
.inode_data
= inode_data
;
1871 /* Actually realize the watch now */
1872 r
= inode_data_realize_watch(e
, inode_data
);
1876 (void) sd_event_source_set_description(s
, path
);
1885 static sd_event_source
* event_source_free(sd_event_source
*s
) {
1889 /* Here's a special hack: when we are called from a
1890 * dispatch handler we won't free the event source
1891 * immediately, but we will detach the fd from the
1892 * epoll. This way it is safe for the caller to unref
1893 * the event source and immediately close the fd, but
1894 * we still retain a valid event source object after
1897 if (s
->dispatching
) {
1898 if (s
->type
== SOURCE_IO
)
1899 source_io_unregister(s
);
1901 source_disconnect(s
);
1908 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source
, sd_event_source
, event_source_free
);
1910 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1911 assert_return(s
, -EINVAL
);
1912 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1914 return free_and_strdup(&s
->description
, description
);
1917 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1918 assert_return(s
, -EINVAL
);
1919 assert_return(description
, -EINVAL
);
1920 assert_return(s
->description
, -ENXIO
);
1921 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1923 *description
= s
->description
;
1927 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1928 assert_return(s
, NULL
);
1933 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1934 assert_return(s
, -EINVAL
);
1935 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1936 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1937 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1942 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1943 assert_return(s
, -EINVAL
);
1944 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1945 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1950 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1953 assert_return(s
, -EINVAL
);
1954 assert_return(fd
>= 0, -EBADF
);
1955 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1956 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1961 if (s
->enabled
== SD_EVENT_OFF
) {
1963 s
->io
.registered
= false;
1967 saved_fd
= s
->io
.fd
;
1968 assert(s
->io
.registered
);
1971 s
->io
.registered
= false;
1973 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1975 s
->io
.fd
= saved_fd
;
1976 s
->io
.registered
= true;
1980 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1986 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
1987 assert_return(s
, -EINVAL
);
1988 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1993 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
1994 assert_return(s
, -EINVAL
);
1995 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2001 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
2002 assert_return(s
, -EINVAL
);
2003 assert_return(events
, -EINVAL
);
2004 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2005 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2007 *events
= s
->io
.events
;
2011 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
2014 assert_return(s
, -EINVAL
);
2015 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2016 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
2017 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2018 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2020 /* edge-triggered updates are never skipped, so we can reset edges */
2021 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
2024 r
= source_set_pending(s
, false);
2028 if (s
->enabled
!= SD_EVENT_OFF
) {
2029 r
= source_io_register(s
, s
->enabled
, events
);
2034 s
->io
.events
= events
;
2039 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
2040 assert_return(s
, -EINVAL
);
2041 assert_return(revents
, -EINVAL
);
2042 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2043 assert_return(s
->pending
, -ENODATA
);
2044 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2046 *revents
= s
->io
.revents
;
2050 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
2051 assert_return(s
, -EINVAL
);
2052 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
2053 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2055 return s
->signal
.sig
;
2058 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
2059 assert_return(s
, -EINVAL
);
2060 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2062 *priority
= s
->priority
;
2066 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
2067 bool rm_inotify
= false, rm_inode
= false;
2068 struct inotify_data
*new_inotify_data
= NULL
;
2069 struct inode_data
*new_inode_data
= NULL
;
2072 assert_return(s
, -EINVAL
);
2073 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2074 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2076 if (s
->priority
== priority
)
2079 if (s
->type
== SOURCE_INOTIFY
) {
2080 struct inode_data
*old_inode_data
;
2082 assert(s
->inotify
.inode_data
);
2083 old_inode_data
= s
->inotify
.inode_data
;
2085 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2086 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2087 * events we allow priority changes only until the first following iteration. */
2088 if (old_inode_data
->fd
< 0)
2091 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
2096 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
2101 if (new_inode_data
->fd
< 0) {
2102 /* Duplicate the fd for the new inode object if we don't have any yet */
2103 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
2104 if (new_inode_data
->fd
< 0) {
2109 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close
, new_inode_data
);
2112 /* Move the event source to the new inode data structure */
2113 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2114 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2115 s
->inotify
.inode_data
= new_inode_data
;
2117 /* Now create the new watch */
2118 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
2121 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2122 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2123 s
->inotify
.inode_data
= old_inode_data
;
2127 s
->priority
= priority
;
2129 event_gc_inode_data(s
->event
, old_inode_data
);
2131 } else if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
2132 struct signal_data
*old
, *d
;
2134 /* Move us from the signalfd belonging to the old
2135 * priority to the signalfd of the new priority */
2137 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
2139 s
->priority
= priority
;
2141 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
2143 s
->priority
= old
->priority
;
2147 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
2149 s
->priority
= priority
;
2152 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2155 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2157 if (s
->type
== SOURCE_EXIT
)
2158 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2164 event_free_inode_data(s
->event
, new_inode_data
);
2167 event_free_inotify_data(s
->event
, new_inotify_data
);
2172 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
2173 assert_return(s
, -EINVAL
);
2174 assert_return(m
, -EINVAL
);
2175 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2181 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
2184 assert_return(s
, -EINVAL
);
2185 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
2186 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2188 /* If we are dead anyway, we are fine with turning off
2189 * sources, but everything else needs to fail. */
2190 if (s
->event
->state
== SD_EVENT_FINISHED
)
2191 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
2193 if (s
->enabled
== m
)
2196 if (m
== SD_EVENT_OFF
) {
2198 /* Unset the pending flag when this event source is disabled */
2199 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2200 r
= source_set_pending(s
, false);
2208 source_io_unregister(s
);
2212 case SOURCE_TIME_REALTIME
:
2213 case SOURCE_TIME_BOOTTIME
:
2214 case SOURCE_TIME_MONOTONIC
:
2215 case SOURCE_TIME_REALTIME_ALARM
:
2216 case SOURCE_TIME_BOOTTIME_ALARM
: {
2217 struct clock_data
*d
;
2220 d
= event_get_clock_data(s
->event
, s
->type
);
2223 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2224 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2225 d
->needs_rearm
= true;
2232 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2238 assert(s
->event
->n_enabled_child_sources
> 0);
2239 s
->event
->n_enabled_child_sources
--;
2241 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2246 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2251 case SOURCE_INOTIFY
:
2256 assert_not_reached("Wut? I shouldn't exist.");
2261 /* Unset the pending flag when this event source is enabled */
2262 if (s
->enabled
== SD_EVENT_OFF
&& !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2263 r
= source_set_pending(s
, false);
2271 r
= source_io_register(s
, m
, s
->io
.events
);
2278 case SOURCE_TIME_REALTIME
:
2279 case SOURCE_TIME_BOOTTIME
:
2280 case SOURCE_TIME_MONOTONIC
:
2281 case SOURCE_TIME_REALTIME_ALARM
:
2282 case SOURCE_TIME_BOOTTIME_ALARM
: {
2283 struct clock_data
*d
;
2286 d
= event_get_clock_data(s
->event
, s
->type
);
2289 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2290 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2291 d
->needs_rearm
= true;
2299 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2301 s
->enabled
= SD_EVENT_OFF
;
2302 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2310 if (s
->enabled
== SD_EVENT_OFF
)
2311 s
->event
->n_enabled_child_sources
++;
2315 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2317 s
->enabled
= SD_EVENT_OFF
;
2318 s
->event
->n_enabled_child_sources
--;
2319 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2327 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2332 case SOURCE_INOTIFY
:
2337 assert_not_reached("Wut? I shouldn't exist.");
2342 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2345 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2350 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
2351 assert_return(s
, -EINVAL
);
2352 assert_return(usec
, -EINVAL
);
2353 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2354 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2356 *usec
= s
->time
.next
;
2360 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
2361 struct clock_data
*d
;
2364 assert_return(s
, -EINVAL
);
2365 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2366 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2367 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2369 r
= source_set_pending(s
, false);
2373 s
->time
.next
= usec
;
2375 d
= event_get_clock_data(s
->event
, s
->type
);
2378 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2379 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2380 d
->needs_rearm
= true;
2385 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
2386 assert_return(s
, -EINVAL
);
2387 assert_return(usec
, -EINVAL
);
2388 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2389 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2391 *usec
= s
->time
.accuracy
;
2395 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
2396 struct clock_data
*d
;
2399 assert_return(s
, -EINVAL
);
2400 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
2401 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2402 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2403 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2405 r
= source_set_pending(s
, false);
2410 usec
= DEFAULT_ACCURACY_USEC
;
2412 s
->time
.accuracy
= usec
;
2414 d
= event_get_clock_data(s
->event
, s
->type
);
2417 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2418 d
->needs_rearm
= true;
2423 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
2424 assert_return(s
, -EINVAL
);
2425 assert_return(clock
, -EINVAL
);
2426 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2427 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2429 *clock
= event_source_type_to_clock(s
->type
);
2433 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
2434 assert_return(s
, -EINVAL
);
2435 assert_return(pid
, -EINVAL
);
2436 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2437 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2439 *pid
= s
->child
.pid
;
2443 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
2444 assert_return(s
, -EINVAL
);
2445 assert_return(mask
, -EINVAL
);
2446 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
2447 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2449 *mask
= s
->inotify
.mask
;
2453 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
2456 assert_return(s
, -EINVAL
);
2457 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2458 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2459 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2461 if (s
->prepare
== callback
)
2464 if (callback
&& s
->prepare
) {
2465 s
->prepare
= callback
;
2469 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
2473 s
->prepare
= callback
;
2476 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
2480 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
2485 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
2486 assert_return(s
, NULL
);
2491 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
2494 assert_return(s
, NULL
);
2497 s
->userdata
= userdata
;
2502 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
2509 if (a
>= USEC_INFINITY
)
2510 return USEC_INFINITY
;
2515 initialize_perturb(e
);
2518 Find a good time to wake up again between times a and b. We
2519 have two goals here:
2521 a) We want to wake up as seldom as possible, hence prefer
2522 later times over earlier times.
2524 b) But if we have to wake up, then let's make sure to
2525 dispatch as much as possible on the entire system.
2527 We implement this by waking up everywhere at the same time
2528 within any given minute if we can, synchronised via the
2529 perturbation value determined from the boot ID. If we can't,
2530 then we try to find the same spot in every 10s, then 1s and
2531 then 250ms step. Otherwise, we pick the last possible time
2535 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
2537 if (_unlikely_(c
< USEC_PER_MINUTE
))
2540 c
-= USEC_PER_MINUTE
;
2546 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
2548 if (_unlikely_(c
< USEC_PER_SEC
*10))
2551 c
-= USEC_PER_SEC
*10;
2557 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
2559 if (_unlikely_(c
< USEC_PER_SEC
))
2568 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
2570 if (_unlikely_(c
< USEC_PER_MSEC
*250))
2573 c
-= USEC_PER_MSEC
*250;
2582 static int event_arm_timer(
2584 struct clock_data
*d
) {
2586 struct itimerspec its
= {};
2587 sd_event_source
*a
, *b
;
2594 if (!d
->needs_rearm
)
2597 d
->needs_rearm
= false;
2599 a
= prioq_peek(d
->earliest
);
2600 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2605 if (d
->next
== USEC_INFINITY
)
2609 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2613 d
->next
= USEC_INFINITY
;
2617 b
= prioq_peek(d
->latest
);
2618 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2620 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2624 assert_se(d
->fd
>= 0);
2627 /* We don' want to disarm here, just mean some time looooong ago. */
2628 its
.it_value
.tv_sec
= 0;
2629 its
.it_value
.tv_nsec
= 1;
2631 timespec_store(&its
.it_value
, t
);
2633 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2641 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2644 assert(s
->type
== SOURCE_IO
);
2646 /* If the event source was already pending, we just OR in the
2647 * new revents, otherwise we reset the value. The ORing is
2648 * necessary to handle EPOLLONESHOT events properly where
2649 * readability might happen independently of writability, and
2650 * we need to keep track of both */
2653 s
->io
.revents
|= revents
;
2655 s
->io
.revents
= revents
;
2657 return source_set_pending(s
, true);
2660 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2667 assert_return(events
== EPOLLIN
, -EIO
);
2669 ss
= read(fd
, &x
, sizeof(x
));
2671 if (IN_SET(errno
, EAGAIN
, EINTR
))
2677 if (_unlikely_(ss
!= sizeof(x
)))
2681 *next
= USEC_INFINITY
;
2686 static int process_timer(
2689 struct clock_data
*d
) {
2698 s
= prioq_peek(d
->earliest
);
2701 s
->enabled
== SD_EVENT_OFF
||
2705 r
= source_set_pending(s
, true);
2709 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2710 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2711 d
->needs_rearm
= true;
2717 static int process_child(sd_event
*e
) {
2724 e
->need_process_child
= false;
2727 So, this is ugly. We iteratively invoke waitid() with P_PID
2728 + WNOHANG for each PID we wait for, instead of using
2729 P_ALL. This is because we only want to get child
2730 information of very specific child processes, and not all
2731 of them. We might not have processed the SIGCHLD even of a
2732 previous invocation and we don't want to maintain a
2733 unbounded *per-child* event queue, hence we really don't
2734 want anything flushed out of the kernel's queue that we
2735 don't care about. Since this is O(n) this means that if you
2736 have a lot of processes you probably want to handle SIGCHLD
2739 We do not reap the children here (by using WNOWAIT), this
2740 is only done after the event source is dispatched so that
2741 the callback still sees the process as a zombie.
2744 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2745 assert(s
->type
== SOURCE_CHILD
);
2750 if (s
->enabled
== SD_EVENT_OFF
)
2753 zero(s
->child
.siginfo
);
2754 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2755 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2759 if (s
->child
.siginfo
.si_pid
!= 0) {
2760 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2762 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2763 /* If the child isn't dead then let's
2764 * immediately remove the state change
2765 * from the queue, since there's no
2766 * benefit in leaving it queued */
2768 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2769 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2772 r
= source_set_pending(s
, true);
2781 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2782 bool read_one
= false;
2787 assert_return(events
== EPOLLIN
, -EIO
);
2789 /* If there's a signal queued on this priority and SIGCHLD is
2790 on this priority too, then make sure to recheck the
2791 children we watch. This is because we only ever dequeue
2792 the first signal per priority, and if we dequeue one, and
2793 SIGCHLD might be enqueued later we wouldn't know, but we
2794 might have higher priority children we care about hence we
2795 need to check that explicitly. */
2797 if (sigismember(&d
->sigset
, SIGCHLD
))
2798 e
->need_process_child
= true;
2800 /* If there's already an event source pending for this
2801 * priority we don't read another */
2806 struct signalfd_siginfo si
;
2808 sd_event_source
*s
= NULL
;
2810 n
= read(d
->fd
, &si
, sizeof(si
));
2812 if (IN_SET(errno
, EAGAIN
, EINTR
))
2818 if (_unlikely_(n
!= sizeof(si
)))
2821 assert(SIGNAL_VALID(si
.ssi_signo
));
2825 if (e
->signal_sources
)
2826 s
= e
->signal_sources
[si
.ssi_signo
];
2832 s
->signal
.siginfo
= si
;
2835 r
= source_set_pending(s
, true);
2843 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
) {
2849 assert_return(revents
== EPOLLIN
, -EIO
);
2851 /* If there's already an event source pending for this priority, don't read another */
2852 if (d
->n_pending
> 0)
2855 /* Is the read buffer non-empty? If so, let's not read more */
2856 if (d
->buffer_filled
> 0)
2859 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
2861 if (IN_SET(errno
, EAGAIN
, EINTR
))
2868 d
->buffer_filled
= (size_t) n
;
2869 LIST_PREPEND(buffered
, e
->inotify_data_buffered
, d
);
2874 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
2877 assert(sz
<= d
->buffer_filled
);
2882 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2883 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
2884 d
->buffer_filled
-= sz
;
2886 if (d
->buffer_filled
== 0)
2887 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
2890 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
2896 /* If there's already an event source pending for this priority, don't read another */
2897 if (d
->n_pending
> 0)
2900 while (d
->buffer_filled
> 0) {
2903 /* Let's validate that the event structures are complete */
2904 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
2907 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2908 if (d
->buffer_filled
< sz
)
2911 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
2912 struct inode_data
*inode_data
;
2915 /* The queue overran, let's pass this event to all event sources connected to this inotify
2918 HASHMAP_FOREACH(inode_data
, d
->inodes
, i
) {
2921 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2923 if (s
->enabled
== SD_EVENT_OFF
)
2926 r
= source_set_pending(s
, true);
2932 struct inode_data
*inode_data
;
2935 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2936 * our watch descriptor table. */
2937 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
2939 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2941 event_inotify_data_drop(e
, d
, sz
);
2945 /* The watch descriptor was removed by the kernel, let's drop it here too */
2946 inode_data
->wd
= -1;
2948 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2950 event_inotify_data_drop(e
, d
, sz
);
2955 /* Trigger all event sources that are interested in these events. Also trigger all event
2956 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2957 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2959 if (s
->enabled
== SD_EVENT_OFF
)
2962 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
2963 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
2966 r
= source_set_pending(s
, true);
2972 /* Something pending now? If so, let's finish, otherwise let's read more. */
2973 if (d
->n_pending
> 0)
2980 static int process_inotify(sd_event
*e
) {
2981 struct inotify_data
*d
;
2986 LIST_FOREACH(buffered
, d
, e
->inotify_data_buffered
) {
2987 r
= event_inotify_data_process(e
, d
);
2997 static int source_dispatch(sd_event_source
*s
) {
2998 EventSourceType saved_type
;
3002 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
3004 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3006 saved_type
= s
->type
;
3008 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
3009 r
= source_set_pending(s
, false);
3014 if (s
->type
!= SOURCE_POST
) {
3018 /* If we execute a non-post source, let's mark all
3019 * post sources as pending */
3021 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
3022 if (z
->enabled
== SD_EVENT_OFF
)
3025 r
= source_set_pending(z
, true);
3031 if (s
->enabled
== SD_EVENT_ONESHOT
) {
3032 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3037 s
->dispatching
= true;
3042 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
3045 case SOURCE_TIME_REALTIME
:
3046 case SOURCE_TIME_BOOTTIME
:
3047 case SOURCE_TIME_MONOTONIC
:
3048 case SOURCE_TIME_REALTIME_ALARM
:
3049 case SOURCE_TIME_BOOTTIME_ALARM
:
3050 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
3054 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
3057 case SOURCE_CHILD
: {
3060 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
3062 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
3064 /* Now, reap the PID for good. */
3066 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
3072 r
= s
->defer
.callback(s
, s
->userdata
);
3076 r
= s
->post
.callback(s
, s
->userdata
);
3080 r
= s
->exit
.callback(s
, s
->userdata
);
3083 case SOURCE_INOTIFY
: {
3084 struct sd_event
*e
= s
->event
;
3085 struct inotify_data
*d
;
3088 assert(s
->inotify
.inode_data
);
3089 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
3091 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
3092 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
3093 assert(d
->buffer_filled
>= sz
);
3095 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
3097 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3099 if (d
->n_pending
== 0)
3100 event_inotify_data_drop(e
, d
, sz
);
3105 case SOURCE_WATCHDOG
:
3106 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
3107 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
3108 assert_not_reached("Wut? I shouldn't exist.");
3111 s
->dispatching
= false;
3114 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
3115 strna(s
->description
), event_source_type_to_string(saved_type
));
3120 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3125 static int event_prepare(sd_event
*e
) {
3133 s
= prioq_peek(e
->prepare
);
3134 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
3137 s
->prepare_iteration
= e
->iteration
;
3138 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
3144 s
->dispatching
= true;
3145 r
= s
->prepare(s
, s
->userdata
);
3146 s
->dispatching
= false;
3149 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3150 strna(s
->description
), event_source_type_to_string(s
->type
));
3155 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3161 static int dispatch_exit(sd_event
*e
) {
3163 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3168 p
= prioq_peek(e
->exit
);
3169 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
3170 e
->state
= SD_EVENT_FINISHED
;
3174 ref
= sd_event_ref(e
);
3176 e
->state
= SD_EVENT_EXITING
;
3177 r
= source_dispatch(p
);
3178 e
->state
= SD_EVENT_INITIAL
;
3182 static sd_event_source
* event_next_pending(sd_event
*e
) {
3187 p
= prioq_peek(e
->pending
);
3191 if (p
->enabled
== SD_EVENT_OFF
)
3197 static int arm_watchdog(sd_event
*e
) {
3198 struct itimerspec its
= {};
3203 assert(e
->watchdog_fd
>= 0);
3205 t
= sleep_between(e
,
3206 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
3207 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
3209 timespec_store(&its
.it_value
, t
);
3211 /* Make sure we never set the watchdog to 0, which tells the
3212 * kernel to disable it. */
3213 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
3214 its
.it_value
.tv_nsec
= 1;
3216 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
3223 static int process_watchdog(sd_event
*e
) {
3229 /* Don't notify watchdog too often */
3230 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
3233 sd_notify(false, "WATCHDOG=1");
3234 e
->watchdog_last
= e
->timestamp
.monotonic
;
3236 return arm_watchdog(e
);
3239 static void event_close_inode_data_fds(sd_event
*e
) {
3240 struct inode_data
*d
;
3244 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3245 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3246 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3247 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3250 while ((d
= e
->inode_data_to_close
)) {
3252 d
->fd
= safe_close(d
->fd
);
3254 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
3258 _public_
int sd_event_prepare(sd_event
*e
) {
3261 assert_return(e
, -EINVAL
);
3262 assert_return(e
= event_resolve(e
), -ENOPKG
);
3263 assert_return(!event_pid_changed(e
), -ECHILD
);
3264 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3265 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3267 if (e
->exit_requested
)
3272 e
->state
= SD_EVENT_PREPARING
;
3273 r
= event_prepare(e
);
3274 e
->state
= SD_EVENT_INITIAL
;
3278 r
= event_arm_timer(e
, &e
->realtime
);
3282 r
= event_arm_timer(e
, &e
->boottime
);
3286 r
= event_arm_timer(e
, &e
->monotonic
);
3290 r
= event_arm_timer(e
, &e
->realtime_alarm
);
3294 r
= event_arm_timer(e
, &e
->boottime_alarm
);
3298 event_close_inode_data_fds(e
);
3300 if (event_next_pending(e
) || e
->need_process_child
)
3303 e
->state
= SD_EVENT_ARMED
;
3308 e
->state
= SD_EVENT_ARMED
;
3309 r
= sd_event_wait(e
, 0);
3311 e
->state
= SD_EVENT_ARMED
;
3316 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
3317 struct epoll_event
*ev_queue
;
3318 unsigned ev_queue_max
;
3321 assert_return(e
, -EINVAL
);
3322 assert_return(e
= event_resolve(e
), -ENOPKG
);
3323 assert_return(!event_pid_changed(e
), -ECHILD
);
3324 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3325 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
3327 if (e
->exit_requested
) {
3328 e
->state
= SD_EVENT_PENDING
;
3332 ev_queue_max
= MAX(e
->n_sources
, 1u);
3333 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
3335 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3336 if (e
->inotify_data_buffered
)
3339 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
3340 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
3342 if (errno
== EINTR
) {
3343 e
->state
= SD_EVENT_PENDING
;
3351 triple_timestamp_get(&e
->timestamp
);
3353 for (i
= 0; i
< m
; i
++) {
3355 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
3356 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
3358 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
3362 case WAKEUP_EVENT_SOURCE
:
3363 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3366 case WAKEUP_CLOCK_DATA
: {
3367 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
3368 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
3372 case WAKEUP_SIGNAL_DATA
:
3373 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3376 case WAKEUP_INOTIFY_DATA
:
3377 r
= event_inotify_data_read(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3381 assert_not_reached("Invalid wake-up pointer");
3388 r
= process_watchdog(e
);
3392 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
3396 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
3400 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
3404 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
3408 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
3412 if (e
->need_process_child
) {
3413 r
= process_child(e
);
3418 r
= process_inotify(e
);
3422 if (event_next_pending(e
)) {
3423 e
->state
= SD_EVENT_PENDING
;
3431 e
->state
= SD_EVENT_INITIAL
;
3436 _public_
int sd_event_dispatch(sd_event
*e
) {
3440 assert_return(e
, -EINVAL
);
3441 assert_return(e
= event_resolve(e
), -ENOPKG
);
3442 assert_return(!event_pid_changed(e
), -ECHILD
);
3443 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3444 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
3446 if (e
->exit_requested
)
3447 return dispatch_exit(e
);
3449 p
= event_next_pending(e
);
3451 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3453 ref
= sd_event_ref(e
);
3454 e
->state
= SD_EVENT_RUNNING
;
3455 r
= source_dispatch(p
);
3456 e
->state
= SD_EVENT_INITIAL
;
3460 e
->state
= SD_EVENT_INITIAL
;
3465 static void event_log_delays(sd_event
*e
) {
3466 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1];
3470 for (i
= o
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
3471 o
+= snprintf(&b
[o
], sizeof(b
) - o
, "%u ", e
->delays
[i
]);
3474 log_debug("Event loop iterations: %.*s", o
, b
);
3477 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
3480 assert_return(e
, -EINVAL
);
3481 assert_return(e
= event_resolve(e
), -ENOPKG
);
3482 assert_return(!event_pid_changed(e
), -ECHILD
);
3483 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3484 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3486 if (e
->profile_delays
&& e
->last_run
) {
3490 this_run
= now(CLOCK_MONOTONIC
);
3492 l
= u64log2(this_run
- e
->last_run
);
3493 assert(l
< sizeof(e
->delays
));
3496 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
3497 event_log_delays(e
);
3498 e
->last_log
= this_run
;
3502 r
= sd_event_prepare(e
);
3504 /* There was nothing? Then wait... */
3505 r
= sd_event_wait(e
, timeout
);
3507 if (e
->profile_delays
)
3508 e
->last_run
= now(CLOCK_MONOTONIC
);
3511 /* There's something now, then let's dispatch it */
3512 r
= sd_event_dispatch(e
);
3522 _public_
int sd_event_loop(sd_event
*e
) {
3523 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3526 assert_return(e
, -EINVAL
);
3527 assert_return(e
= event_resolve(e
), -ENOPKG
);
3528 assert_return(!event_pid_changed(e
), -ECHILD
);
3529 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3531 ref
= sd_event_ref(e
);
3533 while (e
->state
!= SD_EVENT_FINISHED
) {
3534 r
= sd_event_run(e
, (uint64_t) -1);
3539 return e
->exit_code
;
3542 _public_
int sd_event_get_fd(sd_event
*e
) {
3544 assert_return(e
, -EINVAL
);
3545 assert_return(e
= event_resolve(e
), -ENOPKG
);
3546 assert_return(!event_pid_changed(e
), -ECHILD
);
3551 _public_
int sd_event_get_state(sd_event
*e
) {
3552 assert_return(e
, -EINVAL
);
3553 assert_return(e
= event_resolve(e
), -ENOPKG
);
3554 assert_return(!event_pid_changed(e
), -ECHILD
);
3559 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
3560 assert_return(e
, -EINVAL
);
3561 assert_return(e
= event_resolve(e
), -ENOPKG
);
3562 assert_return(code
, -EINVAL
);
3563 assert_return(!event_pid_changed(e
), -ECHILD
);
3565 if (!e
->exit_requested
)
3568 *code
= e
->exit_code
;
3572 _public_
int sd_event_exit(sd_event
*e
, int code
) {
3573 assert_return(e
, -EINVAL
);
3574 assert_return(e
= event_resolve(e
), -ENOPKG
);
3575 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3576 assert_return(!event_pid_changed(e
), -ECHILD
);
3578 e
->exit_requested
= true;
3579 e
->exit_code
= code
;
3584 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
3585 assert_return(e
, -EINVAL
);
3586 assert_return(e
= event_resolve(e
), -ENOPKG
);
3587 assert_return(usec
, -EINVAL
);
3588 assert_return(!event_pid_changed(e
), -ECHILD
);
3590 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
3593 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3594 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3595 * the purpose of getting the time this doesn't matter. */
3596 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
3599 if (!triple_timestamp_is_set(&e
->timestamp
)) {
3600 /* Implicitly fall back to now() if we never ran
3601 * before and thus have no cached time. */
3606 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
3610 _public_
int sd_event_default(sd_event
**ret
) {
3615 return !!default_event
;
3617 if (default_event
) {
3618 *ret
= sd_event_ref(default_event
);
3622 r
= sd_event_new(&e
);
3626 e
->default_event_ptr
= &default_event
;
3634 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
3635 assert_return(e
, -EINVAL
);
3636 assert_return(e
= event_resolve(e
), -ENOPKG
);
3637 assert_return(tid
, -EINVAL
);
3638 assert_return(!event_pid_changed(e
), -ECHILD
);
3648 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
3651 assert_return(e
, -EINVAL
);
3652 assert_return(e
= event_resolve(e
), -ENOPKG
);
3653 assert_return(!event_pid_changed(e
), -ECHILD
);
3655 if (e
->watchdog
== !!b
)
3659 struct epoll_event ev
;
3661 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
3665 /* Issue first ping immediately */
3666 sd_notify(false, "WATCHDOG=1");
3667 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
3669 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
3670 if (e
->watchdog_fd
< 0)
3673 r
= arm_watchdog(e
);
3677 ev
= (struct epoll_event
) {
3679 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
3682 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
3689 if (e
->watchdog_fd
>= 0) {
3690 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
3691 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3699 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3703 _public_
int sd_event_get_watchdog(sd_event
*e
) {
3704 assert_return(e
, -EINVAL
);
3705 assert_return(e
= event_resolve(e
), -ENOPKG
);
3706 assert_return(!event_pid_changed(e
), -ECHILD
);
3711 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
3712 assert_return(e
, -EINVAL
);
3713 assert_return(e
= event_resolve(e
), -ENOPKG
);
3714 assert_return(!event_pid_changed(e
), -ECHILD
);
3716 *ret
= e
->iteration
;
3720 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
3721 assert_return(s
, -EINVAL
);
3723 s
->destroy_callback
= callback
;
3727 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
3728 assert_return(s
, -EINVAL
);
3731 *ret
= s
->destroy_callback
;
3733 return !!s
->destroy_callback
;