1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <sys/timerfd.h>
11 #include "alloc-util.h"
19 #include "process-util.h"
21 #include "signal-util.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "time-util.h"
27 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
29 typedef enum EventSourceType
{
33 SOURCE_TIME_MONOTONIC
,
34 SOURCE_TIME_REALTIME_ALARM
,
35 SOURCE_TIME_BOOTTIME_ALARM
,
43 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
44 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
47 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
49 [SOURCE_TIME_REALTIME
] = "realtime",
50 [SOURCE_TIME_BOOTTIME
] = "bootime",
51 [SOURCE_TIME_MONOTONIC
] = "monotonic",
52 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
53 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
54 [SOURCE_SIGNAL
] = "signal",
55 [SOURCE_CHILD
] = "child",
56 [SOURCE_DEFER
] = "defer",
57 [SOURCE_POST
] = "post",
58 [SOURCE_EXIT
] = "exit",
59 [SOURCE_WATCHDOG
] = "watchdog",
60 [SOURCE_INOTIFY
] = "inotify",
63 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
65 /* All objects we use in epoll events start with this value, so that
66 * we know how to dispatch it */
67 typedef enum WakeupType
{
74 _WAKEUP_TYPE_INVALID
= -1,
77 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
81 struct sd_event_source
{
88 sd_event_handler_t prepare
;
92 EventSourceType type
:5;
99 unsigned pending_index
;
100 unsigned prepare_index
;
101 uint64_t pending_iteration
;
102 uint64_t prepare_iteration
;
104 sd_event_destroy_t destroy_callback
;
106 LIST_FIELDS(sd_event_source
, sources
);
110 sd_event_io_handler_t callback
;
118 sd_event_time_handler_t callback
;
119 usec_t next
, accuracy
;
120 unsigned earliest_index
;
121 unsigned latest_index
;
124 sd_event_signal_handler_t callback
;
125 struct signalfd_siginfo siginfo
;
129 sd_event_child_handler_t callback
;
135 sd_event_handler_t callback
;
138 sd_event_handler_t callback
;
141 sd_event_handler_t callback
;
142 unsigned prioq_index
;
145 sd_event_inotify_handler_t callback
;
147 struct inode_data
*inode_data
;
148 LIST_FIELDS(sd_event_source
, by_inode_data
);
157 /* For all clocks we maintain two priority queues each, one
158 * ordered for the earliest times the events may be
159 * dispatched, and one ordered by the latest times they must
160 * have been dispatched. The range between the top entries in
161 * the two prioqs is the time window we can freely schedule
174 /* For each priority we maintain one signal fd, so that we
175 * only have to dequeue a single event per priority at a
181 sd_event_source
*current
;
184 /* A structure listing all event sources currently watching a specific inode */
186 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
190 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
191 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
192 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
193 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
194 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
198 /* The inotify "watch descriptor" */
201 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
202 * most recently been set on the watch descriptor. */
203 uint32_t combined_mask
;
205 /* All event sources subscribed to this inode */
206 LIST_HEAD(sd_event_source
, event_sources
);
208 /* The inotify object we watch this inode with */
209 struct inotify_data
*inotify_data
;
211 /* A linked list of all inode data objects with fds to close (see above) */
212 LIST_FIELDS(struct inode_data
, to_close
);
215 /* A structure encapsulating an inotify fd */
216 struct inotify_data
{
219 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
225 Hashmap
*inodes
; /* The inode_data structures keyed by dev+ino */
226 Hashmap
*wd
; /* The inode_data structures keyed by the watch descriptor for each */
228 /* The buffer we read inotify events into */
229 union inotify_event_buffer buffer
;
230 size_t buffer_filled
; /* fill level of the buffer */
232 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
233 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
234 * the events locally if they can't be coalesced). */
237 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
238 * to make it efficient to figure out what inotify objects to process data on next. */
239 LIST_FIELDS(struct inotify_data
, buffered
);
251 /* timerfd_create() only supports these five clocks so far. We
252 * can add support for more clocks when the kernel learns to
253 * deal with them, too. */
254 struct clock_data realtime
;
255 struct clock_data boottime
;
256 struct clock_data monotonic
;
257 struct clock_data realtime_alarm
;
258 struct clock_data boottime_alarm
;
262 sd_event_source
**signal_sources
; /* indexed by signal number */
263 Hashmap
*signal_data
; /* indexed by priority */
265 Hashmap
*child_sources
;
266 unsigned n_enabled_child_sources
;
272 Hashmap
*inotify_data
; /* indexed by priority */
274 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
275 LIST_HEAD(struct inode_data
, inode_data_to_close
);
277 /* A list of inotify objects that already have events buffered which aren't processed yet */
278 LIST_HEAD(struct inotify_data
, inotify_data_buffered
);
283 triple_timestamp timestamp
;
286 bool exit_requested
:1;
287 bool need_process_child
:1;
289 bool profile_delays
:1;
294 sd_event
**default_event_ptr
;
296 usec_t watchdog_last
, watchdog_period
;
300 LIST_HEAD(sd_event_source
, sources
);
302 usec_t last_run
, last_log
;
303 unsigned delays
[sizeof(usec_t
) * 8];
306 static thread_local sd_event
*default_event
= NULL
;
308 static void source_disconnect(sd_event_source
*s
);
309 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
311 static sd_event
*event_resolve(sd_event
*e
) {
312 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
315 static int pending_prioq_compare(const void *a
, const void *b
) {
316 const sd_event_source
*x
= a
, *y
= b
;
321 /* Enabled ones first */
322 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
324 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
327 /* Lower priority values first */
328 if (x
->priority
< y
->priority
)
330 if (x
->priority
> y
->priority
)
333 /* Older entries first */
334 if (x
->pending_iteration
< y
->pending_iteration
)
336 if (x
->pending_iteration
> y
->pending_iteration
)
342 static int prepare_prioq_compare(const void *a
, const void *b
) {
343 const sd_event_source
*x
= a
, *y
= b
;
348 /* Enabled ones first */
349 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
351 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
354 /* Move most recently prepared ones last, so that we can stop
355 * preparing as soon as we hit one that has already been
356 * prepared in the current iteration */
357 if (x
->prepare_iteration
< y
->prepare_iteration
)
359 if (x
->prepare_iteration
> y
->prepare_iteration
)
362 /* Lower priority values first */
363 if (x
->priority
< y
->priority
)
365 if (x
->priority
> y
->priority
)
371 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
372 const sd_event_source
*x
= a
, *y
= b
;
374 assert(EVENT_SOURCE_IS_TIME(x
->type
));
375 assert(x
->type
== y
->type
);
377 /* Enabled ones first */
378 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
380 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
383 /* Move the pending ones to the end */
384 if (!x
->pending
&& y
->pending
)
386 if (x
->pending
&& !y
->pending
)
390 if (x
->time
.next
< y
->time
.next
)
392 if (x
->time
.next
> y
->time
.next
)
398 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
399 return usec_add(s
->time
.next
, s
->time
.accuracy
);
402 static int latest_time_prioq_compare(const void *a
, const void *b
) {
403 const sd_event_source
*x
= a
, *y
= b
;
405 assert(EVENT_SOURCE_IS_TIME(x
->type
));
406 assert(x
->type
== y
->type
);
408 /* Enabled ones first */
409 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
411 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
414 /* Move the pending ones to the end */
415 if (!x
->pending
&& y
->pending
)
417 if (x
->pending
&& !y
->pending
)
421 if (time_event_source_latest(x
) < time_event_source_latest(y
))
423 if (time_event_source_latest(x
) > time_event_source_latest(y
))
429 static int exit_prioq_compare(const void *a
, const void *b
) {
430 const sd_event_source
*x
= a
, *y
= b
;
432 assert(x
->type
== SOURCE_EXIT
);
433 assert(y
->type
== SOURCE_EXIT
);
435 /* Enabled ones first */
436 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
438 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
441 /* Lower priority values first */
442 if (x
->priority
< y
->priority
)
444 if (x
->priority
> y
->priority
)
450 static void free_clock_data(struct clock_data
*d
) {
452 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
455 prioq_free(d
->earliest
);
456 prioq_free(d
->latest
);
459 static sd_event
*event_free(sd_event
*e
) {
464 while ((s
= e
->sources
)) {
466 source_disconnect(s
);
467 sd_event_source_unref(s
);
470 assert(e
->n_sources
== 0);
472 if (e
->default_event_ptr
)
473 *(e
->default_event_ptr
) = NULL
;
475 safe_close(e
->epoll_fd
);
476 safe_close(e
->watchdog_fd
);
478 free_clock_data(&e
->realtime
);
479 free_clock_data(&e
->boottime
);
480 free_clock_data(&e
->monotonic
);
481 free_clock_data(&e
->realtime_alarm
);
482 free_clock_data(&e
->boottime_alarm
);
484 prioq_free(e
->pending
);
485 prioq_free(e
->prepare
);
488 free(e
->signal_sources
);
489 hashmap_free(e
->signal_data
);
491 hashmap_free(e
->inotify_data
);
493 hashmap_free(e
->child_sources
);
494 set_free(e
->post_sources
);
499 _public_
int sd_event_new(sd_event
** ret
) {
503 assert_return(ret
, -EINVAL
);
505 e
= new(sd_event
, 1);
513 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
515 .realtime
.next
= USEC_INFINITY
,
516 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
518 .boottime
.next
= USEC_INFINITY
,
519 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
521 .monotonic
.next
= USEC_INFINITY
,
522 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
523 .realtime_alarm
.fd
= -1,
524 .realtime_alarm
.next
= USEC_INFINITY
,
525 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
526 .boottime_alarm
.fd
= -1,
527 .boottime_alarm
.next
= USEC_INFINITY
,
528 .perturb
= USEC_INFINITY
,
529 .original_pid
= getpid_cached(),
532 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
536 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
537 if (e
->epoll_fd
< 0) {
542 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
544 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
545 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
546 e
->profile_delays
= true;
557 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event
, sd_event
, event_free
);
559 static bool event_pid_changed(sd_event
*e
) {
562 /* We don't support people creating an event loop and keeping
563 * it around over a fork(). Let's complain. */
565 return e
->original_pid
!= getpid_cached();
568 static void source_io_unregister(sd_event_source
*s
) {
572 assert(s
->type
== SOURCE_IO
);
574 if (event_pid_changed(s
->event
))
577 if (!s
->io
.registered
)
580 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
582 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
583 strna(s
->description
), event_source_type_to_string(s
->type
));
585 s
->io
.registered
= false;
588 static int source_io_register(
593 struct epoll_event ev
;
597 assert(s
->type
== SOURCE_IO
);
598 assert(enabled
!= SD_EVENT_OFF
);
600 ev
= (struct epoll_event
) {
601 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
605 if (s
->io
.registered
)
606 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
608 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
612 s
->io
.registered
= true;
617 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
621 case SOURCE_TIME_REALTIME
:
622 return CLOCK_REALTIME
;
624 case SOURCE_TIME_BOOTTIME
:
625 return CLOCK_BOOTTIME
;
627 case SOURCE_TIME_MONOTONIC
:
628 return CLOCK_MONOTONIC
;
630 case SOURCE_TIME_REALTIME_ALARM
:
631 return CLOCK_REALTIME_ALARM
;
633 case SOURCE_TIME_BOOTTIME_ALARM
:
634 return CLOCK_BOOTTIME_ALARM
;
637 return (clockid_t
) -1;
641 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
646 return SOURCE_TIME_REALTIME
;
649 return SOURCE_TIME_BOOTTIME
;
651 case CLOCK_MONOTONIC
:
652 return SOURCE_TIME_MONOTONIC
;
654 case CLOCK_REALTIME_ALARM
:
655 return SOURCE_TIME_REALTIME_ALARM
;
657 case CLOCK_BOOTTIME_ALARM
:
658 return SOURCE_TIME_BOOTTIME_ALARM
;
661 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
665 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
670 case SOURCE_TIME_REALTIME
:
673 case SOURCE_TIME_BOOTTIME
:
676 case SOURCE_TIME_MONOTONIC
:
677 return &e
->monotonic
;
679 case SOURCE_TIME_REALTIME_ALARM
:
680 return &e
->realtime_alarm
;
682 case SOURCE_TIME_BOOTTIME_ALARM
:
683 return &e
->boottime_alarm
;
690 static int event_make_signal_data(
693 struct signal_data
**ret
) {
695 struct epoll_event ev
;
696 struct signal_data
*d
;
704 if (event_pid_changed(e
))
707 if (e
->signal_sources
&& e
->signal_sources
[sig
])
708 priority
= e
->signal_sources
[sig
]->priority
;
710 priority
= SD_EVENT_PRIORITY_NORMAL
;
712 d
= hashmap_get(e
->signal_data
, &priority
);
714 if (sigismember(&d
->sigset
, sig
) > 0) {
720 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
724 d
= new(struct signal_data
, 1);
728 *d
= (struct signal_data
) {
729 .wakeup
= WAKEUP_SIGNAL_DATA
,
731 .priority
= priority
,
734 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
744 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
746 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
760 d
->fd
= fd_move_above_stdio(r
);
762 ev
= (struct epoll_event
) {
767 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
780 d
->fd
= safe_close(d
->fd
);
781 hashmap_remove(e
->signal_data
, &d
->priority
);
788 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
792 /* Turns off the specified signal in the signal data
793 * object. If the signal mask of the object becomes empty that
796 if (sigismember(&d
->sigset
, sig
) == 0)
799 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
801 if (sigisemptyset(&d
->sigset
)) {
803 /* If all the mask is all-zero we can get rid of the structure */
804 hashmap_remove(e
->signal_data
, &d
->priority
);
812 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
813 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
816 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
817 struct signal_data
*d
;
818 static const int64_t zero_priority
= 0;
822 /* Rechecks if the specified signal is still something we are
823 * interested in. If not, we'll unmask it, and possibly drop
824 * the signalfd for it. */
826 if (sig
== SIGCHLD
&&
827 e
->n_enabled_child_sources
> 0)
830 if (e
->signal_sources
&&
831 e
->signal_sources
[sig
] &&
832 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
836 * The specified signal might be enabled in three different queues:
838 * 1) the one that belongs to the priority passed (if it is non-NULL)
839 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
840 * 3) the 0 priority (to cover the SIGCHLD case)
842 * Hence, let's remove it from all three here.
846 d
= hashmap_get(e
->signal_data
, priority
);
848 event_unmask_signal_data(e
, d
, sig
);
851 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
852 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
854 event_unmask_signal_data(e
, d
, sig
);
857 d
= hashmap_get(e
->signal_data
, &zero_priority
);
859 event_unmask_signal_data(e
, d
, sig
);
862 static void source_disconnect(sd_event_source
*s
) {
870 assert(s
->event
->n_sources
> 0);
876 source_io_unregister(s
);
880 case SOURCE_TIME_REALTIME
:
881 case SOURCE_TIME_BOOTTIME
:
882 case SOURCE_TIME_MONOTONIC
:
883 case SOURCE_TIME_REALTIME_ALARM
:
884 case SOURCE_TIME_BOOTTIME_ALARM
: {
885 struct clock_data
*d
;
887 d
= event_get_clock_data(s
->event
, s
->type
);
890 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
891 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
892 d
->needs_rearm
= true;
897 if (s
->signal
.sig
> 0) {
899 if (s
->event
->signal_sources
)
900 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
902 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
908 if (s
->child
.pid
> 0) {
909 if (s
->enabled
!= SD_EVENT_OFF
) {
910 assert(s
->event
->n_enabled_child_sources
> 0);
911 s
->event
->n_enabled_child_sources
--;
914 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
915 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
925 set_remove(s
->event
->post_sources
, s
);
929 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
932 case SOURCE_INOTIFY
: {
933 struct inode_data
*inode_data
;
935 inode_data
= s
->inotify
.inode_data
;
937 struct inotify_data
*inotify_data
;
938 assert_se(inotify_data
= inode_data
->inotify_data
);
940 /* Detach this event source from the inode object */
941 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
942 s
->inotify
.inode_data
= NULL
;
945 assert(inotify_data
->n_pending
> 0);
946 inotify_data
->n_pending
--;
949 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
950 * continued to being watched. That's because inotify doesn't really have an API for that: we
951 * can only change watch masks with access to the original inode either by fd or by path. But
952 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
953 * continously and keeping the mount busy which we can't really do. We could reconstruct the
954 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
955 * there), but given the need for open_by_handle_at() which is privileged and not universally
956 * available this would be quite an incomplete solution. Hence we go the other way, leave the
957 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
958 * anymore after reception. Yes, this sucks, but … Linux … */
960 /* Maybe release the inode data (and its inotify) */
961 event_gc_inode_data(s
->event
, inode_data
);
968 assert_not_reached("Wut? I shouldn't exist.");
972 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
975 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
979 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
981 LIST_REMOVE(sources
, event
->sources
, s
);
985 sd_event_unref(event
);
988 static void source_free(sd_event_source
*s
) {
991 source_disconnect(s
);
993 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
994 s
->io
.fd
= safe_close(s
->io
.fd
);
996 if (s
->destroy_callback
)
997 s
->destroy_callback(s
->userdata
);
999 free(s
->description
);
1003 static int source_set_pending(sd_event_source
*s
, bool b
) {
1007 assert(s
->type
!= SOURCE_EXIT
);
1009 if (s
->pending
== b
)
1015 s
->pending_iteration
= s
->event
->iteration
;
1017 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
1023 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
1025 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
1026 struct clock_data
*d
;
1028 d
= event_get_clock_data(s
->event
, s
->type
);
1031 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1032 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1033 d
->needs_rearm
= true;
1036 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
1037 struct signal_data
*d
;
1039 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
1040 if (d
&& d
->current
== s
)
1044 if (s
->type
== SOURCE_INOTIFY
) {
1046 assert(s
->inotify
.inode_data
);
1047 assert(s
->inotify
.inode_data
->inotify_data
);
1050 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
1052 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
1053 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
1060 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
1065 s
= new(sd_event_source
, 1);
1069 *s
= (struct sd_event_source
) {
1072 .floating
= floating
,
1074 .pending_index
= PRIOQ_IDX_NULL
,
1075 .prepare_index
= PRIOQ_IDX_NULL
,
1081 LIST_PREPEND(sources
, e
->sources
, s
);
1087 _public_
int sd_event_add_io(
1089 sd_event_source
**ret
,
1092 sd_event_io_handler_t callback
,
1098 assert_return(e
, -EINVAL
);
1099 assert_return(e
= event_resolve(e
), -ENOPKG
);
1100 assert_return(fd
>= 0, -EBADF
);
1101 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1102 assert_return(callback
, -EINVAL
);
1103 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1104 assert_return(!event_pid_changed(e
), -ECHILD
);
1106 s
= source_new(e
, !ret
, SOURCE_IO
);
1110 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1112 s
->io
.events
= events
;
1113 s
->io
.callback
= callback
;
1114 s
->userdata
= userdata
;
1115 s
->enabled
= SD_EVENT_ON
;
1117 r
= source_io_register(s
, s
->enabled
, events
);
1129 static void initialize_perturb(sd_event
*e
) {
1130 sd_id128_t bootid
= {};
1132 /* When we sleep for longer, we try to realign the wakeup to
1133 the same time wihtin each minute/second/250ms, so that
1134 events all across the system can be coalesced into a single
1135 CPU wakeup. However, let's take some system-specific
1136 randomness for this value, so that in a network of systems
1137 with synced clocks timer events are distributed a
1138 bit. Here, we calculate a perturbation usec offset from the
1141 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1144 if (sd_id128_get_boot(&bootid
) >= 0)
1145 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1148 static int event_setup_timer_fd(
1150 struct clock_data
*d
,
1153 struct epoll_event ev
;
1159 if (_likely_(d
->fd
>= 0))
1162 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1166 fd
= fd_move_above_stdio(fd
);
1168 ev
= (struct epoll_event
) {
1173 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1183 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1186 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1189 _public_
int sd_event_add_time(
1191 sd_event_source
**ret
,
1195 sd_event_time_handler_t callback
,
1198 EventSourceType type
;
1200 struct clock_data
*d
;
1203 assert_return(e
, -EINVAL
);
1204 assert_return(e
= event_resolve(e
), -ENOPKG
);
1205 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1206 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1207 assert_return(!event_pid_changed(e
), -ECHILD
);
1209 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1212 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1217 callback
= time_exit_callback
;
1219 d
= event_get_clock_data(e
, type
);
1222 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1226 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1231 r
= event_setup_timer_fd(e
, d
, clock
);
1236 s
= source_new(e
, !ret
, type
);
1240 s
->time
.next
= usec
;
1241 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1242 s
->time
.callback
= callback
;
1243 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1244 s
->userdata
= userdata
;
1245 s
->enabled
= SD_EVENT_ONESHOT
;
1247 d
->needs_rearm
= true;
1249 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1253 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1267 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1270 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1273 _public_
int sd_event_add_signal(
1275 sd_event_source
**ret
,
1277 sd_event_signal_handler_t callback
,
1281 struct signal_data
*d
;
1285 assert_return(e
, -EINVAL
);
1286 assert_return(e
= event_resolve(e
), -ENOPKG
);
1287 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1288 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1289 assert_return(!event_pid_changed(e
), -ECHILD
);
1292 callback
= signal_exit_callback
;
1294 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1298 if (!sigismember(&ss
, sig
))
1301 if (!e
->signal_sources
) {
1302 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1303 if (!e
->signal_sources
)
1305 } else if (e
->signal_sources
[sig
])
1308 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1312 s
->signal
.sig
= sig
;
1313 s
->signal
.callback
= callback
;
1314 s
->userdata
= userdata
;
1315 s
->enabled
= SD_EVENT_ON
;
1317 e
->signal_sources
[sig
] = s
;
1319 r
= event_make_signal_data(e
, sig
, &d
);
1325 /* Use the signal name as description for the event source by default */
1326 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1334 _public_
int sd_event_add_child(
1336 sd_event_source
**ret
,
1339 sd_event_child_handler_t callback
,
1345 assert_return(e
, -EINVAL
);
1346 assert_return(e
= event_resolve(e
), -ENOPKG
);
1347 assert_return(pid
> 1, -EINVAL
);
1348 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1349 assert_return(options
!= 0, -EINVAL
);
1350 assert_return(callback
, -EINVAL
);
1351 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1352 assert_return(!event_pid_changed(e
), -ECHILD
);
1354 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1358 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1361 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1366 s
->child
.options
= options
;
1367 s
->child
.callback
= callback
;
1368 s
->userdata
= userdata
;
1369 s
->enabled
= SD_EVENT_ONESHOT
;
1371 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1377 e
->n_enabled_child_sources
++;
1379 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1381 e
->n_enabled_child_sources
--;
1386 e
->need_process_child
= true;
1394 _public_
int sd_event_add_defer(
1396 sd_event_source
**ret
,
1397 sd_event_handler_t callback
,
1403 assert_return(e
, -EINVAL
);
1404 assert_return(e
= event_resolve(e
), -ENOPKG
);
1405 assert_return(callback
, -EINVAL
);
1406 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1407 assert_return(!event_pid_changed(e
), -ECHILD
);
1409 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1413 s
->defer
.callback
= callback
;
1414 s
->userdata
= userdata
;
1415 s
->enabled
= SD_EVENT_ONESHOT
;
1417 r
= source_set_pending(s
, true);
1429 _public_
int sd_event_add_post(
1431 sd_event_source
**ret
,
1432 sd_event_handler_t callback
,
1438 assert_return(e
, -EINVAL
);
1439 assert_return(e
= event_resolve(e
), -ENOPKG
);
1440 assert_return(callback
, -EINVAL
);
1441 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1442 assert_return(!event_pid_changed(e
), -ECHILD
);
1444 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1448 s
= source_new(e
, !ret
, SOURCE_POST
);
1452 s
->post
.callback
= callback
;
1453 s
->userdata
= userdata
;
1454 s
->enabled
= SD_EVENT_ON
;
1456 r
= set_put(e
->post_sources
, s
);
1468 _public_
int sd_event_add_exit(
1470 sd_event_source
**ret
,
1471 sd_event_handler_t callback
,
1477 assert_return(e
, -EINVAL
);
1478 assert_return(e
= event_resolve(e
), -ENOPKG
);
1479 assert_return(callback
, -EINVAL
);
1480 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1481 assert_return(!event_pid_changed(e
), -ECHILD
);
1483 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1487 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1491 s
->exit
.callback
= callback
;
1492 s
->userdata
= userdata
;
1493 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1494 s
->enabled
= SD_EVENT_ONESHOT
;
1496 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1508 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
1514 assert(hashmap_isempty(d
->inodes
));
1515 assert(hashmap_isempty(d
->wd
));
1517 if (d
->buffer_filled
> 0)
1518 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
1520 hashmap_free(d
->inodes
);
1521 hashmap_free(d
->wd
);
1523 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
1526 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
1527 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
1534 static int event_make_inotify_data(
1537 struct inotify_data
**ret
) {
1539 _cleanup_close_
int fd
= -1;
1540 struct inotify_data
*d
;
1541 struct epoll_event ev
;
1546 d
= hashmap_get(e
->inotify_data
, &priority
);
1553 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
1557 fd
= fd_move_above_stdio(fd
);
1559 r
= hashmap_ensure_allocated(&e
->inotify_data
, &uint64_hash_ops
);
1563 d
= new(struct inotify_data
, 1);
1567 *d
= (struct inotify_data
) {
1568 .wakeup
= WAKEUP_INOTIFY_DATA
,
1570 .priority
= priority
,
1573 r
= hashmap_put(e
->inotify_data
, &d
->priority
, d
);
1575 d
->fd
= safe_close(d
->fd
);
1580 ev
= (struct epoll_event
) {
1585 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
1587 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1588 * remove the fd from the epoll first, which we don't want as we couldn't
1589 * add it in the first place. */
1590 event_free_inotify_data(e
, d
);
1600 static int inode_data_compare(const void *a
, const void *b
) {
1601 const struct inode_data
*x
= a
, *y
= b
;
1606 if (x
->dev
< y
->dev
)
1608 if (x
->dev
> y
->dev
)
1611 if (x
->ino
< y
->ino
)
1613 if (x
->ino
> y
->ino
)
1619 static void inode_data_hash_func(const void *p
, struct siphash
*state
) {
1620 const struct inode_data
*d
= p
;
1624 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
1625 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
1628 const struct hash_ops inode_data_hash_ops
= {
1629 .hash
= inode_data_hash_func
,
1630 .compare
= inode_data_compare
1633 static void event_free_inode_data(
1635 struct inode_data
*d
) {
1642 assert(!d
->event_sources
);
1645 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
1649 if (d
->inotify_data
) {
1652 if (d
->inotify_data
->fd
>= 0) {
1653 /* So here's a problem. At the time this runs the watch descriptor might already be
1654 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1655 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1656 * likely case to happen. */
1658 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
1659 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
1662 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
1665 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
1671 static void event_gc_inode_data(
1673 struct inode_data
*d
) {
1675 struct inotify_data
*inotify_data
;
1682 if (d
->event_sources
)
1685 inotify_data
= d
->inotify_data
;
1686 event_free_inode_data(e
, d
);
1688 if (inotify_data
&& hashmap_isempty(inotify_data
->inodes
))
1689 event_free_inotify_data(e
, inotify_data
);
1692 static int event_make_inode_data(
1694 struct inotify_data
*inotify_data
,
1697 struct inode_data
**ret
) {
1699 struct inode_data
*d
, key
;
1703 assert(inotify_data
);
1705 key
= (struct inode_data
) {
1710 d
= hashmap_get(inotify_data
->inodes
, &key
);
1718 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
1722 d
= new(struct inode_data
, 1);
1726 *d
= (struct inode_data
) {
1731 .inotify_data
= inotify_data
,
1734 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
1746 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
1747 bool excl_unlink
= true;
1748 uint32_t combined
= 0;
1753 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1754 * the IN_EXCL_UNLINK flag is ANDed instead.
1756 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1757 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1758 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
1759 * events we don't care for client-side. */
1761 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
1763 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
1764 excl_unlink
= false;
1766 combined
|= s
->inotify
.mask
;
1769 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
1772 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
1773 uint32_t combined_mask
;
1779 combined_mask
= inode_data_determine_mask(d
);
1781 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
1784 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
1788 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
1793 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
1795 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
1801 } else if (d
->wd
!= wd
) {
1803 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1804 (void) inotify_rm_watch(d
->fd
, wd
);
1808 d
->combined_mask
= combined_mask
;
1812 _public_
int sd_event_add_inotify(
1814 sd_event_source
**ret
,
1817 sd_event_inotify_handler_t callback
,
1820 bool rm_inotify
= false, rm_inode
= false;
1821 struct inotify_data
*inotify_data
= NULL
;
1822 struct inode_data
*inode_data
= NULL
;
1823 _cleanup_close_
int fd
= -1;
1828 assert_return(e
, -EINVAL
);
1829 assert_return(e
= event_resolve(e
), -ENOPKG
);
1830 assert_return(path
, -EINVAL
);
1831 assert_return(callback
, -EINVAL
);
1832 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1833 assert_return(!event_pid_changed(e
), -ECHILD
);
1835 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1836 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1837 * the user can't use them for us. */
1838 if (mask
& IN_MASK_ADD
)
1841 fd
= open(path
, O_PATH
|O_CLOEXEC
|
1842 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0)|
1843 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
1847 if (fstat(fd
, &st
) < 0)
1850 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
1854 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
1855 s
->inotify
.mask
= mask
;
1856 s
->inotify
.callback
= callback
;
1857 s
->userdata
= userdata
;
1859 /* Allocate an inotify object for this priority, and an inode object within it */
1860 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
1865 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
1870 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1871 * the event source, until then, for which we need the original inode. */
1872 if (inode_data
->fd
< 0) {
1873 inode_data
->fd
= TAKE_FD(fd
);
1874 LIST_PREPEND(to_close
, e
->inode_data_to_close
, inode_data
);
1877 /* Link our event source to the inode data object */
1878 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1879 s
->inotify
.inode_data
= inode_data
;
1881 rm_inode
= rm_inotify
= false;
1883 /* Actually realize the watch now */
1884 r
= inode_data_realize_watch(e
, inode_data
);
1888 (void) sd_event_source_set_description(s
, path
);
1899 event_free_inode_data(e
, inode_data
);
1902 event_free_inotify_data(e
, inotify_data
);
1907 static sd_event_source
* event_source_free(sd_event_source
*s
) {
1911 /* Here's a special hack: when we are called from a
1912 * dispatch handler we won't free the event source
1913 * immediately, but we will detach the fd from the
1914 * epoll. This way it is safe for the caller to unref
1915 * the event source and immediately close the fd, but
1916 * we still retain a valid event source object after
1919 if (s
->dispatching
) {
1920 if (s
->type
== SOURCE_IO
)
1921 source_io_unregister(s
);
1923 source_disconnect(s
);
1930 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source
, sd_event_source
, event_source_free
);
1932 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1933 assert_return(s
, -EINVAL
);
1934 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1936 return free_and_strdup(&s
->description
, description
);
1939 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1940 assert_return(s
, -EINVAL
);
1941 assert_return(description
, -EINVAL
);
1942 assert_return(s
->description
, -ENXIO
);
1943 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1945 *description
= s
->description
;
1949 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1950 assert_return(s
, NULL
);
1955 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1956 assert_return(s
, -EINVAL
);
1957 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1958 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1959 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1964 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1965 assert_return(s
, -EINVAL
);
1966 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1967 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1972 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1975 assert_return(s
, -EINVAL
);
1976 assert_return(fd
>= 0, -EBADF
);
1977 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1978 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1983 if (s
->enabled
== SD_EVENT_OFF
) {
1985 s
->io
.registered
= false;
1989 saved_fd
= s
->io
.fd
;
1990 assert(s
->io
.registered
);
1993 s
->io
.registered
= false;
1995 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1997 s
->io
.fd
= saved_fd
;
1998 s
->io
.registered
= true;
2002 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
2008 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
2009 assert_return(s
, -EINVAL
);
2010 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2015 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
2016 assert_return(s
, -EINVAL
);
2017 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2023 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
2024 assert_return(s
, -EINVAL
);
2025 assert_return(events
, -EINVAL
);
2026 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2027 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2029 *events
= s
->io
.events
;
2033 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
2036 assert_return(s
, -EINVAL
);
2037 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2038 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
2039 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2040 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2042 /* edge-triggered updates are never skipped, so we can reset edges */
2043 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
2046 r
= source_set_pending(s
, false);
2050 if (s
->enabled
!= SD_EVENT_OFF
) {
2051 r
= source_io_register(s
, s
->enabled
, events
);
2056 s
->io
.events
= events
;
2061 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
2062 assert_return(s
, -EINVAL
);
2063 assert_return(revents
, -EINVAL
);
2064 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2065 assert_return(s
->pending
, -ENODATA
);
2066 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2068 *revents
= s
->io
.revents
;
2072 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
2073 assert_return(s
, -EINVAL
);
2074 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
2075 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2077 return s
->signal
.sig
;
2080 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
2081 assert_return(s
, -EINVAL
);
2082 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2084 *priority
= s
->priority
;
2088 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
2089 bool rm_inotify
= false, rm_inode
= false;
2090 struct inotify_data
*new_inotify_data
= NULL
;
2091 struct inode_data
*new_inode_data
= NULL
;
2094 assert_return(s
, -EINVAL
);
2095 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2096 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2098 if (s
->priority
== priority
)
2101 if (s
->type
== SOURCE_INOTIFY
) {
2102 struct inode_data
*old_inode_data
;
2104 assert(s
->inotify
.inode_data
);
2105 old_inode_data
= s
->inotify
.inode_data
;
2107 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2108 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2109 * events we allow priority changes only until the first following iteration. */
2110 if (old_inode_data
->fd
< 0)
2113 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
2118 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
2123 if (new_inode_data
->fd
< 0) {
2124 /* Duplicate the fd for the new inode object if we don't have any yet */
2125 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
2126 if (new_inode_data
->fd
< 0) {
2131 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close
, new_inode_data
);
2134 /* Move the event source to the new inode data structure */
2135 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2136 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2137 s
->inotify
.inode_data
= new_inode_data
;
2139 /* Now create the new watch */
2140 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
2143 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2144 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2145 s
->inotify
.inode_data
= old_inode_data
;
2149 s
->priority
= priority
;
2151 event_gc_inode_data(s
->event
, old_inode_data
);
2153 } else if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
2154 struct signal_data
*old
, *d
;
2156 /* Move us from the signalfd belonging to the old
2157 * priority to the signalfd of the new priority */
2159 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
2161 s
->priority
= priority
;
2163 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
2165 s
->priority
= old
->priority
;
2169 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
2171 s
->priority
= priority
;
2174 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2177 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2179 if (s
->type
== SOURCE_EXIT
)
2180 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2186 event_free_inode_data(s
->event
, new_inode_data
);
2189 event_free_inotify_data(s
->event
, new_inotify_data
);
2194 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
2195 assert_return(s
, -EINVAL
);
2196 assert_return(m
, -EINVAL
);
2197 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2203 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
2206 assert_return(s
, -EINVAL
);
2207 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
2208 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2210 /* If we are dead anyway, we are fine with turning off
2211 * sources, but everything else needs to fail. */
2212 if (s
->event
->state
== SD_EVENT_FINISHED
)
2213 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
2215 if (s
->enabled
== m
)
2218 if (m
== SD_EVENT_OFF
) {
2220 /* Unset the pending flag when this event source is disabled */
2221 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2222 r
= source_set_pending(s
, false);
2230 source_io_unregister(s
);
2234 case SOURCE_TIME_REALTIME
:
2235 case SOURCE_TIME_BOOTTIME
:
2236 case SOURCE_TIME_MONOTONIC
:
2237 case SOURCE_TIME_REALTIME_ALARM
:
2238 case SOURCE_TIME_BOOTTIME_ALARM
: {
2239 struct clock_data
*d
;
2242 d
= event_get_clock_data(s
->event
, s
->type
);
2245 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2246 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2247 d
->needs_rearm
= true;
2254 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2260 assert(s
->event
->n_enabled_child_sources
> 0);
2261 s
->event
->n_enabled_child_sources
--;
2263 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2268 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2273 case SOURCE_INOTIFY
:
2278 assert_not_reached("Wut? I shouldn't exist.");
2283 /* Unset the pending flag when this event source is enabled */
2284 if (s
->enabled
== SD_EVENT_OFF
&& !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2285 r
= source_set_pending(s
, false);
2293 r
= source_io_register(s
, m
, s
->io
.events
);
2300 case SOURCE_TIME_REALTIME
:
2301 case SOURCE_TIME_BOOTTIME
:
2302 case SOURCE_TIME_MONOTONIC
:
2303 case SOURCE_TIME_REALTIME_ALARM
:
2304 case SOURCE_TIME_BOOTTIME_ALARM
: {
2305 struct clock_data
*d
;
2308 d
= event_get_clock_data(s
->event
, s
->type
);
2311 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2312 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2313 d
->needs_rearm
= true;
2321 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2323 s
->enabled
= SD_EVENT_OFF
;
2324 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2332 if (s
->enabled
== SD_EVENT_OFF
)
2333 s
->event
->n_enabled_child_sources
++;
2337 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2339 s
->enabled
= SD_EVENT_OFF
;
2340 s
->event
->n_enabled_child_sources
--;
2341 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2349 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2354 case SOURCE_INOTIFY
:
2359 assert_not_reached("Wut? I shouldn't exist.");
2364 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2367 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2372 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
2373 assert_return(s
, -EINVAL
);
2374 assert_return(usec
, -EINVAL
);
2375 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2376 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2378 *usec
= s
->time
.next
;
2382 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
2383 struct clock_data
*d
;
2386 assert_return(s
, -EINVAL
);
2387 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2388 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2389 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2391 r
= source_set_pending(s
, false);
2395 s
->time
.next
= usec
;
2397 d
= event_get_clock_data(s
->event
, s
->type
);
2400 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2401 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2402 d
->needs_rearm
= true;
2407 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
2408 assert_return(s
, -EINVAL
);
2409 assert_return(usec
, -EINVAL
);
2410 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2411 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2413 *usec
= s
->time
.accuracy
;
2417 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
2418 struct clock_data
*d
;
2421 assert_return(s
, -EINVAL
);
2422 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
2423 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2424 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2425 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2427 r
= source_set_pending(s
, false);
2432 usec
= DEFAULT_ACCURACY_USEC
;
2434 s
->time
.accuracy
= usec
;
2436 d
= event_get_clock_data(s
->event
, s
->type
);
2439 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2440 d
->needs_rearm
= true;
2445 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
2446 assert_return(s
, -EINVAL
);
2447 assert_return(clock
, -EINVAL
);
2448 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2449 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2451 *clock
= event_source_type_to_clock(s
->type
);
2455 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
2456 assert_return(s
, -EINVAL
);
2457 assert_return(pid
, -EINVAL
);
2458 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2459 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2461 *pid
= s
->child
.pid
;
2465 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
2466 assert_return(s
, -EINVAL
);
2467 assert_return(mask
, -EINVAL
);
2468 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
2469 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2471 *mask
= s
->inotify
.mask
;
2475 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
2478 assert_return(s
, -EINVAL
);
2479 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2480 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2481 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2483 if (s
->prepare
== callback
)
2486 if (callback
&& s
->prepare
) {
2487 s
->prepare
= callback
;
2491 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
2495 s
->prepare
= callback
;
2498 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
2502 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
2507 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
2508 assert_return(s
, NULL
);
2513 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
2516 assert_return(s
, NULL
);
2519 s
->userdata
= userdata
;
2524 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
2531 if (a
>= USEC_INFINITY
)
2532 return USEC_INFINITY
;
2537 initialize_perturb(e
);
2540 Find a good time to wake up again between times a and b. We
2541 have two goals here:
2543 a) We want to wake up as seldom as possible, hence prefer
2544 later times over earlier times.
2546 b) But if we have to wake up, then let's make sure to
2547 dispatch as much as possible on the entire system.
2549 We implement this by waking up everywhere at the same time
2550 within any given minute if we can, synchronised via the
2551 perturbation value determined from the boot ID. If we can't,
2552 then we try to find the same spot in every 10s, then 1s and
2553 then 250ms step. Otherwise, we pick the last possible time
2557 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
2559 if (_unlikely_(c
< USEC_PER_MINUTE
))
2562 c
-= USEC_PER_MINUTE
;
2568 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
2570 if (_unlikely_(c
< USEC_PER_SEC
*10))
2573 c
-= USEC_PER_SEC
*10;
2579 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
2581 if (_unlikely_(c
< USEC_PER_SEC
))
2590 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
2592 if (_unlikely_(c
< USEC_PER_MSEC
*250))
2595 c
-= USEC_PER_MSEC
*250;
2604 static int event_arm_timer(
2606 struct clock_data
*d
) {
2608 struct itimerspec its
= {};
2609 sd_event_source
*a
, *b
;
2616 if (!d
->needs_rearm
)
2619 d
->needs_rearm
= false;
2621 a
= prioq_peek(d
->earliest
);
2622 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2627 if (d
->next
== USEC_INFINITY
)
2631 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2635 d
->next
= USEC_INFINITY
;
2639 b
= prioq_peek(d
->latest
);
2640 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2642 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2646 assert_se(d
->fd
>= 0);
2649 /* We don' want to disarm here, just mean some time looooong ago. */
2650 its
.it_value
.tv_sec
= 0;
2651 its
.it_value
.tv_nsec
= 1;
2653 timespec_store(&its
.it_value
, t
);
2655 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2663 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2666 assert(s
->type
== SOURCE_IO
);
2668 /* If the event source was already pending, we just OR in the
2669 * new revents, otherwise we reset the value. The ORing is
2670 * necessary to handle EPOLLONESHOT events properly where
2671 * readability might happen independently of writability, and
2672 * we need to keep track of both */
2675 s
->io
.revents
|= revents
;
2677 s
->io
.revents
= revents
;
2679 return source_set_pending(s
, true);
2682 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2689 assert_return(events
== EPOLLIN
, -EIO
);
2691 ss
= read(fd
, &x
, sizeof(x
));
2693 if (IN_SET(errno
, EAGAIN
, EINTR
))
2699 if (_unlikely_(ss
!= sizeof(x
)))
2703 *next
= USEC_INFINITY
;
2708 static int process_timer(
2711 struct clock_data
*d
) {
2720 s
= prioq_peek(d
->earliest
);
2723 s
->enabled
== SD_EVENT_OFF
||
2727 r
= source_set_pending(s
, true);
2731 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2732 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2733 d
->needs_rearm
= true;
2739 static int process_child(sd_event
*e
) {
2746 e
->need_process_child
= false;
2749 So, this is ugly. We iteratively invoke waitid() with P_PID
2750 + WNOHANG for each PID we wait for, instead of using
2751 P_ALL. This is because we only want to get child
2752 information of very specific child processes, and not all
2753 of them. We might not have processed the SIGCHLD even of a
2754 previous invocation and we don't want to maintain a
2755 unbounded *per-child* event queue, hence we really don't
2756 want anything flushed out of the kernel's queue that we
2757 don't care about. Since this is O(n) this means that if you
2758 have a lot of processes you probably want to handle SIGCHLD
2761 We do not reap the children here (by using WNOWAIT), this
2762 is only done after the event source is dispatched so that
2763 the callback still sees the process as a zombie.
2766 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2767 assert(s
->type
== SOURCE_CHILD
);
2772 if (s
->enabled
== SD_EVENT_OFF
)
2775 zero(s
->child
.siginfo
);
2776 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2777 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2781 if (s
->child
.siginfo
.si_pid
!= 0) {
2782 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2784 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2785 /* If the child isn't dead then let's
2786 * immediately remove the state change
2787 * from the queue, since there's no
2788 * benefit in leaving it queued */
2790 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2791 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2794 r
= source_set_pending(s
, true);
2803 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2804 bool read_one
= false;
2809 assert_return(events
== EPOLLIN
, -EIO
);
2811 /* If there's a signal queued on this priority and SIGCHLD is
2812 on this priority too, then make sure to recheck the
2813 children we watch. This is because we only ever dequeue
2814 the first signal per priority, and if we dequeue one, and
2815 SIGCHLD might be enqueued later we wouldn't know, but we
2816 might have higher priority children we care about hence we
2817 need to check that explicitly. */
2819 if (sigismember(&d
->sigset
, SIGCHLD
))
2820 e
->need_process_child
= true;
2822 /* If there's already an event source pending for this
2823 * priority we don't read another */
2828 struct signalfd_siginfo si
;
2830 sd_event_source
*s
= NULL
;
2832 n
= read(d
->fd
, &si
, sizeof(si
));
2834 if (IN_SET(errno
, EAGAIN
, EINTR
))
2840 if (_unlikely_(n
!= sizeof(si
)))
2843 assert(SIGNAL_VALID(si
.ssi_signo
));
2847 if (e
->signal_sources
)
2848 s
= e
->signal_sources
[si
.ssi_signo
];
2854 s
->signal
.siginfo
= si
;
2857 r
= source_set_pending(s
, true);
2865 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
) {
2871 assert_return(revents
== EPOLLIN
, -EIO
);
2873 /* If there's already an event source pending for this priority, don't read another */
2874 if (d
->n_pending
> 0)
2877 /* Is the read buffer non-empty? If so, let's not read more */
2878 if (d
->buffer_filled
> 0)
2881 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
2883 if (IN_SET(errno
, EAGAIN
, EINTR
))
2890 d
->buffer_filled
= (size_t) n
;
2891 LIST_PREPEND(buffered
, e
->inotify_data_buffered
, d
);
2896 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
2899 assert(sz
<= d
->buffer_filled
);
2904 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2905 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
2906 d
->buffer_filled
-= sz
;
2908 if (d
->buffer_filled
== 0)
2909 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
2912 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
2918 /* If there's already an event source pending for this priority, don't read another */
2919 if (d
->n_pending
> 0)
2922 while (d
->buffer_filled
> 0) {
2925 /* Let's validate that the event structures are complete */
2926 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
2929 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2930 if (d
->buffer_filled
< sz
)
2933 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
2934 struct inode_data
*inode_data
;
2937 /* The queue overran, let's pass this event to all event sources connected to this inotify
2940 HASHMAP_FOREACH(inode_data
, d
->inodes
, i
) {
2943 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2945 if (s
->enabled
== SD_EVENT_OFF
)
2948 r
= source_set_pending(s
, true);
2954 struct inode_data
*inode_data
;
2957 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2958 * our watch descriptor table. */
2959 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
2961 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2963 event_inotify_data_drop(e
, d
, sz
);
2967 /* The watch descriptor was removed by the kernel, let's drop it here too */
2968 inode_data
->wd
= -1;
2970 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2972 event_inotify_data_drop(e
, d
, sz
);
2977 /* Trigger all event sources that are interested in these events. Also trigger all event
2978 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2979 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2981 if (s
->enabled
== SD_EVENT_OFF
)
2984 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
2985 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
2988 r
= source_set_pending(s
, true);
2994 /* Something pending now? If so, let's finish, otherwise let's read more. */
2995 if (d
->n_pending
> 0)
3002 static int process_inotify(sd_event
*e
) {
3003 struct inotify_data
*d
;
3008 LIST_FOREACH(buffered
, d
, e
->inotify_data_buffered
) {
3009 r
= event_inotify_data_process(e
, d
);
3019 static int source_dispatch(sd_event_source
*s
) {
3020 EventSourceType saved_type
;
3024 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
3026 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3028 saved_type
= s
->type
;
3030 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
3031 r
= source_set_pending(s
, false);
3036 if (s
->type
!= SOURCE_POST
) {
3040 /* If we execute a non-post source, let's mark all
3041 * post sources as pending */
3043 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
3044 if (z
->enabled
== SD_EVENT_OFF
)
3047 r
= source_set_pending(z
, true);
3053 if (s
->enabled
== SD_EVENT_ONESHOT
) {
3054 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3059 s
->dispatching
= true;
3064 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
3067 case SOURCE_TIME_REALTIME
:
3068 case SOURCE_TIME_BOOTTIME
:
3069 case SOURCE_TIME_MONOTONIC
:
3070 case SOURCE_TIME_REALTIME_ALARM
:
3071 case SOURCE_TIME_BOOTTIME_ALARM
:
3072 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
3076 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
3079 case SOURCE_CHILD
: {
3082 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
3084 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
3086 /* Now, reap the PID for good. */
3088 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
3094 r
= s
->defer
.callback(s
, s
->userdata
);
3098 r
= s
->post
.callback(s
, s
->userdata
);
3102 r
= s
->exit
.callback(s
, s
->userdata
);
3105 case SOURCE_INOTIFY
: {
3106 struct sd_event
*e
= s
->event
;
3107 struct inotify_data
*d
;
3110 assert(s
->inotify
.inode_data
);
3111 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
3113 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
3114 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
3115 assert(d
->buffer_filled
>= sz
);
3117 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
3119 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3121 if (d
->n_pending
== 0)
3122 event_inotify_data_drop(e
, d
, sz
);
3127 case SOURCE_WATCHDOG
:
3128 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
3129 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
3130 assert_not_reached("Wut? I shouldn't exist.");
3133 s
->dispatching
= false;
3136 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
3137 strna(s
->description
), event_source_type_to_string(saved_type
));
3142 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3147 static int event_prepare(sd_event
*e
) {
3155 s
= prioq_peek(e
->prepare
);
3156 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
3159 s
->prepare_iteration
= e
->iteration
;
3160 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
3166 s
->dispatching
= true;
3167 r
= s
->prepare(s
, s
->userdata
);
3168 s
->dispatching
= false;
3171 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3172 strna(s
->description
), event_source_type_to_string(s
->type
));
3177 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3183 static int dispatch_exit(sd_event
*e
) {
3185 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3190 p
= prioq_peek(e
->exit
);
3191 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
3192 e
->state
= SD_EVENT_FINISHED
;
3196 ref
= sd_event_ref(e
);
3198 e
->state
= SD_EVENT_EXITING
;
3199 r
= source_dispatch(p
);
3200 e
->state
= SD_EVENT_INITIAL
;
3204 static sd_event_source
* event_next_pending(sd_event
*e
) {
3209 p
= prioq_peek(e
->pending
);
3213 if (p
->enabled
== SD_EVENT_OFF
)
3219 static int arm_watchdog(sd_event
*e
) {
3220 struct itimerspec its
= {};
3225 assert(e
->watchdog_fd
>= 0);
3227 t
= sleep_between(e
,
3228 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
3229 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
3231 timespec_store(&its
.it_value
, t
);
3233 /* Make sure we never set the watchdog to 0, which tells the
3234 * kernel to disable it. */
3235 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
3236 its
.it_value
.tv_nsec
= 1;
3238 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
3245 static int process_watchdog(sd_event
*e
) {
3251 /* Don't notify watchdog too often */
3252 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
3255 sd_notify(false, "WATCHDOG=1");
3256 e
->watchdog_last
= e
->timestamp
.monotonic
;
3258 return arm_watchdog(e
);
3261 static void event_close_inode_data_fds(sd_event
*e
) {
3262 struct inode_data
*d
;
3266 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3267 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3268 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3269 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3272 while ((d
= e
->inode_data_to_close
)) {
3274 d
->fd
= safe_close(d
->fd
);
3276 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
3280 _public_
int sd_event_prepare(sd_event
*e
) {
3283 assert_return(e
, -EINVAL
);
3284 assert_return(e
= event_resolve(e
), -ENOPKG
);
3285 assert_return(!event_pid_changed(e
), -ECHILD
);
3286 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3287 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3289 if (e
->exit_requested
)
3294 e
->state
= SD_EVENT_PREPARING
;
3295 r
= event_prepare(e
);
3296 e
->state
= SD_EVENT_INITIAL
;
3300 r
= event_arm_timer(e
, &e
->realtime
);
3304 r
= event_arm_timer(e
, &e
->boottime
);
3308 r
= event_arm_timer(e
, &e
->monotonic
);
3312 r
= event_arm_timer(e
, &e
->realtime_alarm
);
3316 r
= event_arm_timer(e
, &e
->boottime_alarm
);
3320 event_close_inode_data_fds(e
);
3322 if (event_next_pending(e
) || e
->need_process_child
)
3325 e
->state
= SD_EVENT_ARMED
;
3330 e
->state
= SD_EVENT_ARMED
;
3331 r
= sd_event_wait(e
, 0);
3333 e
->state
= SD_EVENT_ARMED
;
3338 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
3339 struct epoll_event
*ev_queue
;
3340 unsigned ev_queue_max
;
3343 assert_return(e
, -EINVAL
);
3344 assert_return(e
= event_resolve(e
), -ENOPKG
);
3345 assert_return(!event_pid_changed(e
), -ECHILD
);
3346 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3347 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
3349 if (e
->exit_requested
) {
3350 e
->state
= SD_EVENT_PENDING
;
3354 ev_queue_max
= MAX(e
->n_sources
, 1u);
3355 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
3357 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3358 if (e
->inotify_data_buffered
)
3361 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
3362 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
3364 if (errno
== EINTR
) {
3365 e
->state
= SD_EVENT_PENDING
;
3373 triple_timestamp_get(&e
->timestamp
);
3375 for (i
= 0; i
< m
; i
++) {
3377 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
3378 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
3380 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
3384 case WAKEUP_EVENT_SOURCE
:
3385 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3388 case WAKEUP_CLOCK_DATA
: {
3389 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
3390 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
3394 case WAKEUP_SIGNAL_DATA
:
3395 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3398 case WAKEUP_INOTIFY_DATA
:
3399 r
= event_inotify_data_read(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3403 assert_not_reached("Invalid wake-up pointer");
3410 r
= process_watchdog(e
);
3414 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
3418 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
3422 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
3426 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
3430 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
3434 if (e
->need_process_child
) {
3435 r
= process_child(e
);
3440 r
= process_inotify(e
);
3444 if (event_next_pending(e
)) {
3445 e
->state
= SD_EVENT_PENDING
;
3453 e
->state
= SD_EVENT_INITIAL
;
3458 _public_
int sd_event_dispatch(sd_event
*e
) {
3462 assert_return(e
, -EINVAL
);
3463 assert_return(e
= event_resolve(e
), -ENOPKG
);
3464 assert_return(!event_pid_changed(e
), -ECHILD
);
3465 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3466 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
3468 if (e
->exit_requested
)
3469 return dispatch_exit(e
);
3471 p
= event_next_pending(e
);
3473 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3475 ref
= sd_event_ref(e
);
3476 e
->state
= SD_EVENT_RUNNING
;
3477 r
= source_dispatch(p
);
3478 e
->state
= SD_EVENT_INITIAL
;
3482 e
->state
= SD_EVENT_INITIAL
;
3487 static void event_log_delays(sd_event
*e
) {
3488 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1];
3492 for (i
= o
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
3493 o
+= snprintf(&b
[o
], sizeof(b
) - o
, "%u ", e
->delays
[i
]);
3496 log_debug("Event loop iterations: %.*s", o
, b
);
3499 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
3502 assert_return(e
, -EINVAL
);
3503 assert_return(e
= event_resolve(e
), -ENOPKG
);
3504 assert_return(!event_pid_changed(e
), -ECHILD
);
3505 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3506 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3508 if (e
->profile_delays
&& e
->last_run
) {
3512 this_run
= now(CLOCK_MONOTONIC
);
3514 l
= u64log2(this_run
- e
->last_run
);
3515 assert(l
< sizeof(e
->delays
));
3518 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
3519 event_log_delays(e
);
3520 e
->last_log
= this_run
;
3524 r
= sd_event_prepare(e
);
3526 /* There was nothing? Then wait... */
3527 r
= sd_event_wait(e
, timeout
);
3529 if (e
->profile_delays
)
3530 e
->last_run
= now(CLOCK_MONOTONIC
);
3533 /* There's something now, then let's dispatch it */
3534 r
= sd_event_dispatch(e
);
3544 _public_
int sd_event_loop(sd_event
*e
) {
3545 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3548 assert_return(e
, -EINVAL
);
3549 assert_return(e
= event_resolve(e
), -ENOPKG
);
3550 assert_return(!event_pid_changed(e
), -ECHILD
);
3551 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3553 ref
= sd_event_ref(e
);
3555 while (e
->state
!= SD_EVENT_FINISHED
) {
3556 r
= sd_event_run(e
, (uint64_t) -1);
3561 return e
->exit_code
;
3564 _public_
int sd_event_get_fd(sd_event
*e
) {
3566 assert_return(e
, -EINVAL
);
3567 assert_return(e
= event_resolve(e
), -ENOPKG
);
3568 assert_return(!event_pid_changed(e
), -ECHILD
);
3573 _public_
int sd_event_get_state(sd_event
*e
) {
3574 assert_return(e
, -EINVAL
);
3575 assert_return(e
= event_resolve(e
), -ENOPKG
);
3576 assert_return(!event_pid_changed(e
), -ECHILD
);
3581 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
3582 assert_return(e
, -EINVAL
);
3583 assert_return(e
= event_resolve(e
), -ENOPKG
);
3584 assert_return(code
, -EINVAL
);
3585 assert_return(!event_pid_changed(e
), -ECHILD
);
3587 if (!e
->exit_requested
)
3590 *code
= e
->exit_code
;
3594 _public_
int sd_event_exit(sd_event
*e
, int code
) {
3595 assert_return(e
, -EINVAL
);
3596 assert_return(e
= event_resolve(e
), -ENOPKG
);
3597 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3598 assert_return(!event_pid_changed(e
), -ECHILD
);
3600 e
->exit_requested
= true;
3601 e
->exit_code
= code
;
3606 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
3607 assert_return(e
, -EINVAL
);
3608 assert_return(e
= event_resolve(e
), -ENOPKG
);
3609 assert_return(usec
, -EINVAL
);
3610 assert_return(!event_pid_changed(e
), -ECHILD
);
3612 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
3615 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3616 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3617 * the purpose of getting the time this doesn't matter. */
3618 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
3621 if (!triple_timestamp_is_set(&e
->timestamp
)) {
3622 /* Implicitly fall back to now() if we never ran
3623 * before and thus have no cached time. */
3628 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
3632 _public_
int sd_event_default(sd_event
**ret
) {
3637 return !!default_event
;
3639 if (default_event
) {
3640 *ret
= sd_event_ref(default_event
);
3644 r
= sd_event_new(&e
);
3648 e
->default_event_ptr
= &default_event
;
3656 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
3657 assert_return(e
, -EINVAL
);
3658 assert_return(e
= event_resolve(e
), -ENOPKG
);
3659 assert_return(tid
, -EINVAL
);
3660 assert_return(!event_pid_changed(e
), -ECHILD
);
3670 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
3673 assert_return(e
, -EINVAL
);
3674 assert_return(e
= event_resolve(e
), -ENOPKG
);
3675 assert_return(!event_pid_changed(e
), -ECHILD
);
3677 if (e
->watchdog
== !!b
)
3681 struct epoll_event ev
;
3683 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
3687 /* Issue first ping immediately */
3688 sd_notify(false, "WATCHDOG=1");
3689 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
3691 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
3692 if (e
->watchdog_fd
< 0)
3695 r
= arm_watchdog(e
);
3699 ev
= (struct epoll_event
) {
3701 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
3704 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
3711 if (e
->watchdog_fd
>= 0) {
3712 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
3713 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3721 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3725 _public_
int sd_event_get_watchdog(sd_event
*e
) {
3726 assert_return(e
, -EINVAL
);
3727 assert_return(e
= event_resolve(e
), -ENOPKG
);
3728 assert_return(!event_pid_changed(e
), -ECHILD
);
3733 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
3734 assert_return(e
, -EINVAL
);
3735 assert_return(e
= event_resolve(e
), -ENOPKG
);
3736 assert_return(!event_pid_changed(e
), -ECHILD
);
3738 *ret
= e
->iteration
;
3742 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
3743 assert_return(s
, -EINVAL
);
3745 s
->destroy_callback
= callback
;
3749 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
3750 assert_return(s
, -EINVAL
);
3753 *ret
= s
->destroy_callback
;
3755 return !!s
->destroy_callback
;