1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <sys/timerfd.h>
11 #include "alloc-util.h"
19 #include "process-util.h"
21 #include "signal-util.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "time-util.h"
27 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
29 typedef enum EventSourceType
{
33 SOURCE_TIME_MONOTONIC
,
34 SOURCE_TIME_REALTIME_ALARM
,
35 SOURCE_TIME_BOOTTIME_ALARM
,
43 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
44 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
47 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
49 [SOURCE_TIME_REALTIME
] = "realtime",
50 [SOURCE_TIME_BOOTTIME
] = "bootime",
51 [SOURCE_TIME_MONOTONIC
] = "monotonic",
52 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
53 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
54 [SOURCE_SIGNAL
] = "signal",
55 [SOURCE_CHILD
] = "child",
56 [SOURCE_DEFER
] = "defer",
57 [SOURCE_POST
] = "post",
58 [SOURCE_EXIT
] = "exit",
59 [SOURCE_WATCHDOG
] = "watchdog",
60 [SOURCE_INOTIFY
] = "inotify",
63 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
65 /* All objects we use in epoll events start with this value, so that
66 * we know how to dispatch it */
67 typedef enum WakeupType
{
74 _WAKEUP_TYPE_INVALID
= -1,
77 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
81 struct sd_event_source
{
88 sd_event_handler_t prepare
;
92 EventSourceType type
:5;
99 unsigned pending_index
;
100 unsigned prepare_index
;
101 uint64_t pending_iteration
;
102 uint64_t prepare_iteration
;
104 sd_event_destroy_t destroy_callback
;
106 LIST_FIELDS(sd_event_source
, sources
);
110 sd_event_io_handler_t callback
;
118 sd_event_time_handler_t callback
;
119 usec_t next
, accuracy
;
120 unsigned earliest_index
;
121 unsigned latest_index
;
124 sd_event_signal_handler_t callback
;
125 struct signalfd_siginfo siginfo
;
129 sd_event_child_handler_t callback
;
135 sd_event_handler_t callback
;
138 sd_event_handler_t callback
;
141 sd_event_handler_t callback
;
142 unsigned prioq_index
;
145 sd_event_inotify_handler_t callback
;
147 struct inode_data
*inode_data
;
148 LIST_FIELDS(sd_event_source
, by_inode_data
);
157 /* For all clocks we maintain two priority queues each, one
158 * ordered for the earliest times the events may be
159 * dispatched, and one ordered by the latest times they must
160 * have been dispatched. The range between the top entries in
161 * the two prioqs is the time window we can freely schedule
174 /* For each priority we maintain one signal fd, so that we
175 * only have to dequeue a single event per priority at a
181 sd_event_source
*current
;
184 /* A structure listing all event sources currently watching a specific inode */
186 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
190 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
191 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
192 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
193 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
194 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
198 /* The inotify "watch descriptor" */
201 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
202 * most recently been set on the watch descriptor. */
203 uint32_t combined_mask
;
205 /* All event sources subscribed to this inode */
206 LIST_HEAD(sd_event_source
, event_sources
);
208 /* The inotify object we watch this inode with */
209 struct inotify_data
*inotify_data
;
211 /* A linked list of all inode data objects with fds to close (see above) */
212 LIST_FIELDS(struct inode_data
, to_close
);
215 /* A structure encapsulating an inotify fd */
216 struct inotify_data
{
219 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
225 Hashmap
*inodes
; /* The inode_data structures keyed by dev+ino */
226 Hashmap
*wd
; /* The inode_data structures keyed by the watch descriptor for each */
228 /* The buffer we read inotify events into */
229 union inotify_event_buffer buffer
;
230 size_t buffer_filled
; /* fill level of the buffer */
232 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
233 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
234 * the events locally if they can't be coalesced). */
237 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
238 * to make it efficient to figure out what inotify objects to process data on next. */
239 LIST_FIELDS(struct inotify_data
, buffered
);
251 /* timerfd_create() only supports these five clocks so far. We
252 * can add support for more clocks when the kernel learns to
253 * deal with them, too. */
254 struct clock_data realtime
;
255 struct clock_data boottime
;
256 struct clock_data monotonic
;
257 struct clock_data realtime_alarm
;
258 struct clock_data boottime_alarm
;
262 sd_event_source
**signal_sources
; /* indexed by signal number */
263 Hashmap
*signal_data
; /* indexed by priority */
265 Hashmap
*child_sources
;
266 unsigned n_enabled_child_sources
;
272 Hashmap
*inotify_data
; /* indexed by priority */
274 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
275 LIST_HEAD(struct inode_data
, inode_data_to_close
);
277 /* A list of inotify objects that already have events buffered which aren't processed yet */
278 LIST_HEAD(struct inotify_data
, inotify_data_buffered
);
283 triple_timestamp timestamp
;
286 bool exit_requested
:1;
287 bool need_process_child
:1;
289 bool profile_delays
:1;
294 sd_event
**default_event_ptr
;
296 usec_t watchdog_last
, watchdog_period
;
300 LIST_HEAD(sd_event_source
, sources
);
302 usec_t last_run
, last_log
;
303 unsigned delays
[sizeof(usec_t
) * 8];
306 static thread_local sd_event
*default_event
= NULL
;
308 static void source_disconnect(sd_event_source
*s
);
309 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
311 static sd_event
*event_resolve(sd_event
*e
) {
312 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
315 static int pending_prioq_compare(const void *a
, const void *b
) {
316 const sd_event_source
*x
= a
, *y
= b
;
322 /* Enabled ones first */
323 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
325 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
328 /* Lower priority values first */
329 r
= CMP(x
->priority
, y
->priority
);
333 /* Older entries first */
334 return CMP(x
->pending_iteration
, y
->pending_iteration
);
337 static int prepare_prioq_compare(const void *a
, const void *b
) {
338 const sd_event_source
*x
= a
, *y
= b
;
344 /* Enabled ones first */
345 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
347 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
350 /* Move most recently prepared ones last, so that we can stop
351 * preparing as soon as we hit one that has already been
352 * prepared in the current iteration */
353 r
= CMP(x
->prepare_iteration
, y
->prepare_iteration
);
357 /* Lower priority values first */
358 return CMP(x
->priority
, y
->priority
);
361 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
362 const sd_event_source
*x
= a
, *y
= b
;
364 assert(EVENT_SOURCE_IS_TIME(x
->type
));
365 assert(x
->type
== y
->type
);
367 /* Enabled ones first */
368 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
370 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
373 /* Move the pending ones to the end */
374 if (!x
->pending
&& y
->pending
)
376 if (x
->pending
&& !y
->pending
)
380 return CMP(x
->time
.next
, y
->time
.next
);
383 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
384 return usec_add(s
->time
.next
, s
->time
.accuracy
);
387 static int latest_time_prioq_compare(const void *a
, const void *b
) {
388 const sd_event_source
*x
= a
, *y
= b
;
390 assert(EVENT_SOURCE_IS_TIME(x
->type
));
391 assert(x
->type
== y
->type
);
393 /* Enabled ones first */
394 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
396 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
399 /* Move the pending ones to the end */
400 if (!x
->pending
&& y
->pending
)
402 if (x
->pending
&& !y
->pending
)
406 return CMP(time_event_source_latest(x
), time_event_source_latest(y
));
409 static int exit_prioq_compare(const void *a
, const void *b
) {
410 const sd_event_source
*x
= a
, *y
= b
;
412 assert(x
->type
== SOURCE_EXIT
);
413 assert(y
->type
== SOURCE_EXIT
);
415 /* Enabled ones first */
416 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
418 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
421 /* Lower priority values first */
422 if (x
->priority
< y
->priority
)
424 if (x
->priority
> y
->priority
)
430 static void free_clock_data(struct clock_data
*d
) {
432 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
435 prioq_free(d
->earliest
);
436 prioq_free(d
->latest
);
439 static sd_event
*event_free(sd_event
*e
) {
444 while ((s
= e
->sources
)) {
446 source_disconnect(s
);
447 sd_event_source_unref(s
);
450 assert(e
->n_sources
== 0);
452 if (e
->default_event_ptr
)
453 *(e
->default_event_ptr
) = NULL
;
455 safe_close(e
->epoll_fd
);
456 safe_close(e
->watchdog_fd
);
458 free_clock_data(&e
->realtime
);
459 free_clock_data(&e
->boottime
);
460 free_clock_data(&e
->monotonic
);
461 free_clock_data(&e
->realtime_alarm
);
462 free_clock_data(&e
->boottime_alarm
);
464 prioq_free(e
->pending
);
465 prioq_free(e
->prepare
);
468 free(e
->signal_sources
);
469 hashmap_free(e
->signal_data
);
471 hashmap_free(e
->inotify_data
);
473 hashmap_free(e
->child_sources
);
474 set_free(e
->post_sources
);
479 _public_
int sd_event_new(sd_event
** ret
) {
483 assert_return(ret
, -EINVAL
);
485 e
= new(sd_event
, 1);
493 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
495 .realtime
.next
= USEC_INFINITY
,
496 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
498 .boottime
.next
= USEC_INFINITY
,
499 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
501 .monotonic
.next
= USEC_INFINITY
,
502 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
503 .realtime_alarm
.fd
= -1,
504 .realtime_alarm
.next
= USEC_INFINITY
,
505 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
506 .boottime_alarm
.fd
= -1,
507 .boottime_alarm
.next
= USEC_INFINITY
,
508 .perturb
= USEC_INFINITY
,
509 .original_pid
= getpid_cached(),
512 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
516 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
517 if (e
->epoll_fd
< 0) {
522 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
524 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
525 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
526 e
->profile_delays
= true;
537 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event
, sd_event
, event_free
);
539 static bool event_pid_changed(sd_event
*e
) {
542 /* We don't support people creating an event loop and keeping
543 * it around over a fork(). Let's complain. */
545 return e
->original_pid
!= getpid_cached();
548 static void source_io_unregister(sd_event_source
*s
) {
552 assert(s
->type
== SOURCE_IO
);
554 if (event_pid_changed(s
->event
))
557 if (!s
->io
.registered
)
560 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
562 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
563 strna(s
->description
), event_source_type_to_string(s
->type
));
565 s
->io
.registered
= false;
568 static int source_io_register(
573 struct epoll_event ev
;
577 assert(s
->type
== SOURCE_IO
);
578 assert(enabled
!= SD_EVENT_OFF
);
580 ev
= (struct epoll_event
) {
581 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
585 if (s
->io
.registered
)
586 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
588 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
592 s
->io
.registered
= true;
597 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
601 case SOURCE_TIME_REALTIME
:
602 return CLOCK_REALTIME
;
604 case SOURCE_TIME_BOOTTIME
:
605 return CLOCK_BOOTTIME
;
607 case SOURCE_TIME_MONOTONIC
:
608 return CLOCK_MONOTONIC
;
610 case SOURCE_TIME_REALTIME_ALARM
:
611 return CLOCK_REALTIME_ALARM
;
613 case SOURCE_TIME_BOOTTIME_ALARM
:
614 return CLOCK_BOOTTIME_ALARM
;
617 return (clockid_t
) -1;
621 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
626 return SOURCE_TIME_REALTIME
;
629 return SOURCE_TIME_BOOTTIME
;
631 case CLOCK_MONOTONIC
:
632 return SOURCE_TIME_MONOTONIC
;
634 case CLOCK_REALTIME_ALARM
:
635 return SOURCE_TIME_REALTIME_ALARM
;
637 case CLOCK_BOOTTIME_ALARM
:
638 return SOURCE_TIME_BOOTTIME_ALARM
;
641 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
645 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
650 case SOURCE_TIME_REALTIME
:
653 case SOURCE_TIME_BOOTTIME
:
656 case SOURCE_TIME_MONOTONIC
:
657 return &e
->monotonic
;
659 case SOURCE_TIME_REALTIME_ALARM
:
660 return &e
->realtime_alarm
;
662 case SOURCE_TIME_BOOTTIME_ALARM
:
663 return &e
->boottime_alarm
;
670 static int event_make_signal_data(
673 struct signal_data
**ret
) {
675 struct epoll_event ev
;
676 struct signal_data
*d
;
684 if (event_pid_changed(e
))
687 if (e
->signal_sources
&& e
->signal_sources
[sig
])
688 priority
= e
->signal_sources
[sig
]->priority
;
690 priority
= SD_EVENT_PRIORITY_NORMAL
;
692 d
= hashmap_get(e
->signal_data
, &priority
);
694 if (sigismember(&d
->sigset
, sig
) > 0) {
700 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
704 d
= new(struct signal_data
, 1);
708 *d
= (struct signal_data
) {
709 .wakeup
= WAKEUP_SIGNAL_DATA
,
711 .priority
= priority
,
714 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
724 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
726 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
740 d
->fd
= fd_move_above_stdio(r
);
742 ev
= (struct epoll_event
) {
747 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
760 d
->fd
= safe_close(d
->fd
);
761 hashmap_remove(e
->signal_data
, &d
->priority
);
768 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
772 /* Turns off the specified signal in the signal data
773 * object. If the signal mask of the object becomes empty that
776 if (sigismember(&d
->sigset
, sig
) == 0)
779 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
781 if (sigisemptyset(&d
->sigset
)) {
783 /* If all the mask is all-zero we can get rid of the structure */
784 hashmap_remove(e
->signal_data
, &d
->priority
);
792 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
793 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
796 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
797 struct signal_data
*d
;
798 static const int64_t zero_priority
= 0;
802 /* Rechecks if the specified signal is still something we are
803 * interested in. If not, we'll unmask it, and possibly drop
804 * the signalfd for it. */
806 if (sig
== SIGCHLD
&&
807 e
->n_enabled_child_sources
> 0)
810 if (e
->signal_sources
&&
811 e
->signal_sources
[sig
] &&
812 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
816 * The specified signal might be enabled in three different queues:
818 * 1) the one that belongs to the priority passed (if it is non-NULL)
819 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
820 * 3) the 0 priority (to cover the SIGCHLD case)
822 * Hence, let's remove it from all three here.
826 d
= hashmap_get(e
->signal_data
, priority
);
828 event_unmask_signal_data(e
, d
, sig
);
831 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
832 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
834 event_unmask_signal_data(e
, d
, sig
);
837 d
= hashmap_get(e
->signal_data
, &zero_priority
);
839 event_unmask_signal_data(e
, d
, sig
);
842 static void source_disconnect(sd_event_source
*s
) {
850 assert(s
->event
->n_sources
> 0);
856 source_io_unregister(s
);
860 case SOURCE_TIME_REALTIME
:
861 case SOURCE_TIME_BOOTTIME
:
862 case SOURCE_TIME_MONOTONIC
:
863 case SOURCE_TIME_REALTIME_ALARM
:
864 case SOURCE_TIME_BOOTTIME_ALARM
: {
865 struct clock_data
*d
;
867 d
= event_get_clock_data(s
->event
, s
->type
);
870 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
871 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
872 d
->needs_rearm
= true;
877 if (s
->signal
.sig
> 0) {
879 if (s
->event
->signal_sources
)
880 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
882 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
888 if (s
->child
.pid
> 0) {
889 if (s
->enabled
!= SD_EVENT_OFF
) {
890 assert(s
->event
->n_enabled_child_sources
> 0);
891 s
->event
->n_enabled_child_sources
--;
894 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
895 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
905 set_remove(s
->event
->post_sources
, s
);
909 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
912 case SOURCE_INOTIFY
: {
913 struct inode_data
*inode_data
;
915 inode_data
= s
->inotify
.inode_data
;
917 struct inotify_data
*inotify_data
;
918 assert_se(inotify_data
= inode_data
->inotify_data
);
920 /* Detach this event source from the inode object */
921 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
922 s
->inotify
.inode_data
= NULL
;
925 assert(inotify_data
->n_pending
> 0);
926 inotify_data
->n_pending
--;
929 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
930 * continued to being watched. That's because inotify doesn't really have an API for that: we
931 * can only change watch masks with access to the original inode either by fd or by path. But
932 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
933 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
934 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
935 * there), but given the need for open_by_handle_at() which is privileged and not universally
936 * available this would be quite an incomplete solution. Hence we go the other way, leave the
937 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
938 * anymore after reception. Yes, this sucks, but … Linux … */
940 /* Maybe release the inode data (and its inotify) */
941 event_gc_inode_data(s
->event
, inode_data
);
948 assert_not_reached("Wut? I shouldn't exist.");
952 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
955 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
959 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
961 LIST_REMOVE(sources
, event
->sources
, s
);
965 sd_event_unref(event
);
968 static void source_free(sd_event_source
*s
) {
971 source_disconnect(s
);
973 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
974 s
->io
.fd
= safe_close(s
->io
.fd
);
976 if (s
->destroy_callback
)
977 s
->destroy_callback(s
->userdata
);
979 free(s
->description
);
982 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source
*, source_free
);
984 static int source_set_pending(sd_event_source
*s
, bool b
) {
988 assert(s
->type
!= SOURCE_EXIT
);
996 s
->pending_iteration
= s
->event
->iteration
;
998 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
1004 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
1006 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
1007 struct clock_data
*d
;
1009 d
= event_get_clock_data(s
->event
, s
->type
);
1012 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1013 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1014 d
->needs_rearm
= true;
1017 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
1018 struct signal_data
*d
;
1020 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
1021 if (d
&& d
->current
== s
)
1025 if (s
->type
== SOURCE_INOTIFY
) {
1027 assert(s
->inotify
.inode_data
);
1028 assert(s
->inotify
.inode_data
->inotify_data
);
1031 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
1033 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
1034 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
1041 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
1046 s
= new(sd_event_source
, 1);
1050 *s
= (struct sd_event_source
) {
1053 .floating
= floating
,
1055 .pending_index
= PRIOQ_IDX_NULL
,
1056 .prepare_index
= PRIOQ_IDX_NULL
,
1062 LIST_PREPEND(sources
, e
->sources
, s
);
1068 _public_
int sd_event_add_io(
1070 sd_event_source
**ret
,
1073 sd_event_io_handler_t callback
,
1076 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1079 assert_return(e
, -EINVAL
);
1080 assert_return(e
= event_resolve(e
), -ENOPKG
);
1081 assert_return(fd
>= 0, -EBADF
);
1082 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1083 assert_return(callback
, -EINVAL
);
1084 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1085 assert_return(!event_pid_changed(e
), -ECHILD
);
1087 s
= source_new(e
, !ret
, SOURCE_IO
);
1091 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1093 s
->io
.events
= events
;
1094 s
->io
.callback
= callback
;
1095 s
->userdata
= userdata
;
1096 s
->enabled
= SD_EVENT_ON
;
1098 r
= source_io_register(s
, s
->enabled
, events
);
1109 static void initialize_perturb(sd_event
*e
) {
1110 sd_id128_t bootid
= {};
1112 /* When we sleep for longer, we try to realign the wakeup to
1113 the same time within each minute/second/250ms, so that
1114 events all across the system can be coalesced into a single
1115 CPU wakeup. However, let's take some system-specific
1116 randomness for this value, so that in a network of systems
1117 with synced clocks timer events are distributed a
1118 bit. Here, we calculate a perturbation usec offset from the
1121 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1124 if (sd_id128_get_boot(&bootid
) >= 0)
1125 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1128 static int event_setup_timer_fd(
1130 struct clock_data
*d
,
1133 struct epoll_event ev
;
1139 if (_likely_(d
->fd
>= 0))
1142 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1146 fd
= fd_move_above_stdio(fd
);
1148 ev
= (struct epoll_event
) {
1153 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1163 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1166 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1169 _public_
int sd_event_add_time(
1171 sd_event_source
**ret
,
1175 sd_event_time_handler_t callback
,
1178 EventSourceType type
;
1179 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1180 struct clock_data
*d
;
1183 assert_return(e
, -EINVAL
);
1184 assert_return(e
= event_resolve(e
), -ENOPKG
);
1185 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1186 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1187 assert_return(!event_pid_changed(e
), -ECHILD
);
1189 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1192 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1197 callback
= time_exit_callback
;
1199 d
= event_get_clock_data(e
, type
);
1202 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1206 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1211 r
= event_setup_timer_fd(e
, d
, clock
);
1216 s
= source_new(e
, !ret
, type
);
1220 s
->time
.next
= usec
;
1221 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1222 s
->time
.callback
= callback
;
1223 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1224 s
->userdata
= userdata
;
1225 s
->enabled
= SD_EVENT_ONESHOT
;
1227 d
->needs_rearm
= true;
1229 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1233 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1244 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1247 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1250 _public_
int sd_event_add_signal(
1252 sd_event_source
**ret
,
1254 sd_event_signal_handler_t callback
,
1257 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1258 struct signal_data
*d
;
1262 assert_return(e
, -EINVAL
);
1263 assert_return(e
= event_resolve(e
), -ENOPKG
);
1264 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1265 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1266 assert_return(!event_pid_changed(e
), -ECHILD
);
1269 callback
= signal_exit_callback
;
1271 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1275 if (!sigismember(&ss
, sig
))
1278 if (!e
->signal_sources
) {
1279 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1280 if (!e
->signal_sources
)
1282 } else if (e
->signal_sources
[sig
])
1285 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1289 s
->signal
.sig
= sig
;
1290 s
->signal
.callback
= callback
;
1291 s
->userdata
= userdata
;
1292 s
->enabled
= SD_EVENT_ON
;
1294 e
->signal_sources
[sig
] = s
;
1296 r
= event_make_signal_data(e
, sig
, &d
);
1300 /* Use the signal name as description for the event source by default */
1301 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1310 _public_
int sd_event_add_child(
1312 sd_event_source
**ret
,
1315 sd_event_child_handler_t callback
,
1318 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1321 assert_return(e
, -EINVAL
);
1322 assert_return(e
= event_resolve(e
), -ENOPKG
);
1323 assert_return(pid
> 1, -EINVAL
);
1324 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1325 assert_return(options
!= 0, -EINVAL
);
1326 assert_return(callback
, -EINVAL
);
1327 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1328 assert_return(!event_pid_changed(e
), -ECHILD
);
1330 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1334 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1337 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1342 s
->child
.options
= options
;
1343 s
->child
.callback
= callback
;
1344 s
->userdata
= userdata
;
1345 s
->enabled
= SD_EVENT_ONESHOT
;
1347 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1351 e
->n_enabled_child_sources
++;
1353 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1355 e
->n_enabled_child_sources
--;
1359 e
->need_process_child
= true;
1368 _public_
int sd_event_add_defer(
1370 sd_event_source
**ret
,
1371 sd_event_handler_t callback
,
1374 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1377 assert_return(e
, -EINVAL
);
1378 assert_return(e
= event_resolve(e
), -ENOPKG
);
1379 assert_return(callback
, -EINVAL
);
1380 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1381 assert_return(!event_pid_changed(e
), -ECHILD
);
1383 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1387 s
->defer
.callback
= callback
;
1388 s
->userdata
= userdata
;
1389 s
->enabled
= SD_EVENT_ONESHOT
;
1391 r
= source_set_pending(s
, true);
1402 _public_
int sd_event_add_post(
1404 sd_event_source
**ret
,
1405 sd_event_handler_t callback
,
1408 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1411 assert_return(e
, -EINVAL
);
1412 assert_return(e
= event_resolve(e
), -ENOPKG
);
1413 assert_return(callback
, -EINVAL
);
1414 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1415 assert_return(!event_pid_changed(e
), -ECHILD
);
1417 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1421 s
= source_new(e
, !ret
, SOURCE_POST
);
1425 s
->post
.callback
= callback
;
1426 s
->userdata
= userdata
;
1427 s
->enabled
= SD_EVENT_ON
;
1429 r
= set_put(e
->post_sources
, s
);
1440 _public_
int sd_event_add_exit(
1442 sd_event_source
**ret
,
1443 sd_event_handler_t callback
,
1446 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1449 assert_return(e
, -EINVAL
);
1450 assert_return(e
= event_resolve(e
), -ENOPKG
);
1451 assert_return(callback
, -EINVAL
);
1452 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1453 assert_return(!event_pid_changed(e
), -ECHILD
);
1455 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1459 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1463 s
->exit
.callback
= callback
;
1464 s
->userdata
= userdata
;
1465 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1466 s
->enabled
= SD_EVENT_ONESHOT
;
1468 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1479 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
1485 assert(hashmap_isempty(d
->inodes
));
1486 assert(hashmap_isempty(d
->wd
));
1488 if (d
->buffer_filled
> 0)
1489 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
1491 hashmap_free(d
->inodes
);
1492 hashmap_free(d
->wd
);
1494 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
1497 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
1498 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
1505 static int event_make_inotify_data(
1508 struct inotify_data
**ret
) {
1510 _cleanup_close_
int fd
= -1;
1511 struct inotify_data
*d
;
1512 struct epoll_event ev
;
1517 d
= hashmap_get(e
->inotify_data
, &priority
);
1524 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
1528 fd
= fd_move_above_stdio(fd
);
1530 r
= hashmap_ensure_allocated(&e
->inotify_data
, &uint64_hash_ops
);
1534 d
= new(struct inotify_data
, 1);
1538 *d
= (struct inotify_data
) {
1539 .wakeup
= WAKEUP_INOTIFY_DATA
,
1541 .priority
= priority
,
1544 r
= hashmap_put(e
->inotify_data
, &d
->priority
, d
);
1546 d
->fd
= safe_close(d
->fd
);
1551 ev
= (struct epoll_event
) {
1556 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
1558 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1559 * remove the fd from the epoll first, which we don't want as we couldn't
1560 * add it in the first place. */
1561 event_free_inotify_data(e
, d
);
1571 static int inode_data_compare(const void *a
, const void *b
) {
1572 const struct inode_data
*x
= a
, *y
= b
;
1577 if (x
->dev
< y
->dev
)
1579 if (x
->dev
> y
->dev
)
1582 if (x
->ino
< y
->ino
)
1584 if (x
->ino
> y
->ino
)
1590 static void inode_data_hash_func(const void *p
, struct siphash
*state
) {
1591 const struct inode_data
*d
= p
;
1595 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
1596 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
1599 const struct hash_ops inode_data_hash_ops
= {
1600 .hash
= inode_data_hash_func
,
1601 .compare
= inode_data_compare
1604 static void event_free_inode_data(
1606 struct inode_data
*d
) {
1613 assert(!d
->event_sources
);
1616 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
1620 if (d
->inotify_data
) {
1623 if (d
->inotify_data
->fd
>= 0) {
1624 /* So here's a problem. At the time this runs the watch descriptor might already be
1625 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1626 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1627 * likely case to happen. */
1629 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
1630 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
1633 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
1636 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
1642 static void event_gc_inode_data(
1644 struct inode_data
*d
) {
1646 struct inotify_data
*inotify_data
;
1653 if (d
->event_sources
)
1656 inotify_data
= d
->inotify_data
;
1657 event_free_inode_data(e
, d
);
1659 if (inotify_data
&& hashmap_isempty(inotify_data
->inodes
))
1660 event_free_inotify_data(e
, inotify_data
);
1663 static int event_make_inode_data(
1665 struct inotify_data
*inotify_data
,
1668 struct inode_data
**ret
) {
1670 struct inode_data
*d
, key
;
1674 assert(inotify_data
);
1676 key
= (struct inode_data
) {
1681 d
= hashmap_get(inotify_data
->inodes
, &key
);
1689 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
1693 d
= new(struct inode_data
, 1);
1697 *d
= (struct inode_data
) {
1702 .inotify_data
= inotify_data
,
1705 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
1717 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
1718 bool excl_unlink
= true;
1719 uint32_t combined
= 0;
1724 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1725 * the IN_EXCL_UNLINK flag is ANDed instead.
1727 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1728 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1729 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1730 * events we don't care for client-side. */
1732 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
1734 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
1735 excl_unlink
= false;
1737 combined
|= s
->inotify
.mask
;
1740 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
1743 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
1744 uint32_t combined_mask
;
1750 combined_mask
= inode_data_determine_mask(d
);
1752 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
1755 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
1759 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
1764 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
1766 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
1772 } else if (d
->wd
!= wd
) {
1774 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1775 (void) inotify_rm_watch(d
->fd
, wd
);
1779 d
->combined_mask
= combined_mask
;
1783 _public_
int sd_event_add_inotify(
1785 sd_event_source
**ret
,
1788 sd_event_inotify_handler_t callback
,
1791 struct inotify_data
*inotify_data
= NULL
;
1792 struct inode_data
*inode_data
= NULL
;
1793 _cleanup_close_
int fd
= -1;
1794 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1798 assert_return(e
, -EINVAL
);
1799 assert_return(e
= event_resolve(e
), -ENOPKG
);
1800 assert_return(path
, -EINVAL
);
1801 assert_return(callback
, -EINVAL
);
1802 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1803 assert_return(!event_pid_changed(e
), -ECHILD
);
1805 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1806 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1807 * the user can't use them for us. */
1808 if (mask
& IN_MASK_ADD
)
1811 fd
= open(path
, O_PATH
|O_CLOEXEC
|
1812 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0)|
1813 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
1817 if (fstat(fd
, &st
) < 0)
1820 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
1824 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
1825 s
->inotify
.mask
= mask
;
1826 s
->inotify
.callback
= callback
;
1827 s
->userdata
= userdata
;
1829 /* Allocate an inotify object for this priority, and an inode object within it */
1830 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
1834 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
1836 event_free_inotify_data(e
, inotify_data
);
1840 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1841 * the event source, until then, for which we need the original inode. */
1842 if (inode_data
->fd
< 0) {
1843 inode_data
->fd
= TAKE_FD(fd
);
1844 LIST_PREPEND(to_close
, e
->inode_data_to_close
, inode_data
);
1847 /* Link our event source to the inode data object */
1848 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1849 s
->inotify
.inode_data
= inode_data
;
1851 /* Actually realize the watch now */
1852 r
= inode_data_realize_watch(e
, inode_data
);
1856 (void) sd_event_source_set_description(s
, path
);
1865 static sd_event_source
* event_source_free(sd_event_source
*s
) {
1869 /* Here's a special hack: when we are called from a
1870 * dispatch handler we won't free the event source
1871 * immediately, but we will detach the fd from the
1872 * epoll. This way it is safe for the caller to unref
1873 * the event source and immediately close the fd, but
1874 * we still retain a valid event source object after
1877 if (s
->dispatching
) {
1878 if (s
->type
== SOURCE_IO
)
1879 source_io_unregister(s
);
1881 source_disconnect(s
);
1888 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source
, sd_event_source
, event_source_free
);
1890 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1891 assert_return(s
, -EINVAL
);
1892 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1894 return free_and_strdup(&s
->description
, description
);
1897 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1898 assert_return(s
, -EINVAL
);
1899 assert_return(description
, -EINVAL
);
1900 assert_return(s
->description
, -ENXIO
);
1901 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1903 *description
= s
->description
;
1907 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1908 assert_return(s
, NULL
);
1913 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1914 assert_return(s
, -EINVAL
);
1915 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1916 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1917 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1922 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1923 assert_return(s
, -EINVAL
);
1924 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1925 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1930 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1933 assert_return(s
, -EINVAL
);
1934 assert_return(fd
>= 0, -EBADF
);
1935 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1936 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1941 if (s
->enabled
== SD_EVENT_OFF
) {
1943 s
->io
.registered
= false;
1947 saved_fd
= s
->io
.fd
;
1948 assert(s
->io
.registered
);
1951 s
->io
.registered
= false;
1953 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1955 s
->io
.fd
= saved_fd
;
1956 s
->io
.registered
= true;
1960 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1966 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
1967 assert_return(s
, -EINVAL
);
1968 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1973 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
1974 assert_return(s
, -EINVAL
);
1975 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1981 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1982 assert_return(s
, -EINVAL
);
1983 assert_return(events
, -EINVAL
);
1984 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1985 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1987 *events
= s
->io
.events
;
1991 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1994 assert_return(s
, -EINVAL
);
1995 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1996 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1997 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1998 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2000 /* edge-triggered updates are never skipped, so we can reset edges */
2001 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
2004 r
= source_set_pending(s
, false);
2008 if (s
->enabled
!= SD_EVENT_OFF
) {
2009 r
= source_io_register(s
, s
->enabled
, events
);
2014 s
->io
.events
= events
;
2019 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
2020 assert_return(s
, -EINVAL
);
2021 assert_return(revents
, -EINVAL
);
2022 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2023 assert_return(s
->pending
, -ENODATA
);
2024 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2026 *revents
= s
->io
.revents
;
2030 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
2031 assert_return(s
, -EINVAL
);
2032 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
2033 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2035 return s
->signal
.sig
;
2038 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
2039 assert_return(s
, -EINVAL
);
2040 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2042 *priority
= s
->priority
;
2046 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
2047 bool rm_inotify
= false, rm_inode
= false;
2048 struct inotify_data
*new_inotify_data
= NULL
;
2049 struct inode_data
*new_inode_data
= NULL
;
2052 assert_return(s
, -EINVAL
);
2053 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2054 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2056 if (s
->priority
== priority
)
2059 if (s
->type
== SOURCE_INOTIFY
) {
2060 struct inode_data
*old_inode_data
;
2062 assert(s
->inotify
.inode_data
);
2063 old_inode_data
= s
->inotify
.inode_data
;
2065 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2066 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2067 * events we allow priority changes only until the first following iteration. */
2068 if (old_inode_data
->fd
< 0)
2071 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
2076 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
2081 if (new_inode_data
->fd
< 0) {
2082 /* Duplicate the fd for the new inode object if we don't have any yet */
2083 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
2084 if (new_inode_data
->fd
< 0) {
2089 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close
, new_inode_data
);
2092 /* Move the event source to the new inode data structure */
2093 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2094 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2095 s
->inotify
.inode_data
= new_inode_data
;
2097 /* Now create the new watch */
2098 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
2101 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2102 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2103 s
->inotify
.inode_data
= old_inode_data
;
2107 s
->priority
= priority
;
2109 event_gc_inode_data(s
->event
, old_inode_data
);
2111 } else if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
2112 struct signal_data
*old
, *d
;
2114 /* Move us from the signalfd belonging to the old
2115 * priority to the signalfd of the new priority */
2117 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
2119 s
->priority
= priority
;
2121 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
2123 s
->priority
= old
->priority
;
2127 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
2129 s
->priority
= priority
;
2132 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2135 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2137 if (s
->type
== SOURCE_EXIT
)
2138 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2144 event_free_inode_data(s
->event
, new_inode_data
);
2147 event_free_inotify_data(s
->event
, new_inotify_data
);
2152 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
2153 assert_return(s
, -EINVAL
);
2154 assert_return(m
, -EINVAL
);
2155 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2161 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
2164 assert_return(s
, -EINVAL
);
2165 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
2166 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2168 /* If we are dead anyway, we are fine with turning off
2169 * sources, but everything else needs to fail. */
2170 if (s
->event
->state
== SD_EVENT_FINISHED
)
2171 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
2173 if (s
->enabled
== m
)
2176 if (m
== SD_EVENT_OFF
) {
2178 /* Unset the pending flag when this event source is disabled */
2179 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2180 r
= source_set_pending(s
, false);
2188 source_io_unregister(s
);
2192 case SOURCE_TIME_REALTIME
:
2193 case SOURCE_TIME_BOOTTIME
:
2194 case SOURCE_TIME_MONOTONIC
:
2195 case SOURCE_TIME_REALTIME_ALARM
:
2196 case SOURCE_TIME_BOOTTIME_ALARM
: {
2197 struct clock_data
*d
;
2200 d
= event_get_clock_data(s
->event
, s
->type
);
2203 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2204 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2205 d
->needs_rearm
= true;
2212 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2218 assert(s
->event
->n_enabled_child_sources
> 0);
2219 s
->event
->n_enabled_child_sources
--;
2221 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2226 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2231 case SOURCE_INOTIFY
:
2236 assert_not_reached("Wut? I shouldn't exist.");
2241 /* Unset the pending flag when this event source is enabled */
2242 if (s
->enabled
== SD_EVENT_OFF
&& !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2243 r
= source_set_pending(s
, false);
2251 r
= source_io_register(s
, m
, s
->io
.events
);
2258 case SOURCE_TIME_REALTIME
:
2259 case SOURCE_TIME_BOOTTIME
:
2260 case SOURCE_TIME_MONOTONIC
:
2261 case SOURCE_TIME_REALTIME_ALARM
:
2262 case SOURCE_TIME_BOOTTIME_ALARM
: {
2263 struct clock_data
*d
;
2266 d
= event_get_clock_data(s
->event
, s
->type
);
2269 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2270 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2271 d
->needs_rearm
= true;
2279 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2281 s
->enabled
= SD_EVENT_OFF
;
2282 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2290 if (s
->enabled
== SD_EVENT_OFF
)
2291 s
->event
->n_enabled_child_sources
++;
2295 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2297 s
->enabled
= SD_EVENT_OFF
;
2298 s
->event
->n_enabled_child_sources
--;
2299 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2307 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2312 case SOURCE_INOTIFY
:
2317 assert_not_reached("Wut? I shouldn't exist.");
2322 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2325 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2330 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
2331 assert_return(s
, -EINVAL
);
2332 assert_return(usec
, -EINVAL
);
2333 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2334 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2336 *usec
= s
->time
.next
;
2340 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
2341 struct clock_data
*d
;
2344 assert_return(s
, -EINVAL
);
2345 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2346 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2347 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2349 r
= source_set_pending(s
, false);
2353 s
->time
.next
= usec
;
2355 d
= event_get_clock_data(s
->event
, s
->type
);
2358 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2359 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2360 d
->needs_rearm
= true;
2365 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
2366 assert_return(s
, -EINVAL
);
2367 assert_return(usec
, -EINVAL
);
2368 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2369 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2371 *usec
= s
->time
.accuracy
;
2375 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
2376 struct clock_data
*d
;
2379 assert_return(s
, -EINVAL
);
2380 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
2381 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2382 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2383 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2385 r
= source_set_pending(s
, false);
2390 usec
= DEFAULT_ACCURACY_USEC
;
2392 s
->time
.accuracy
= usec
;
2394 d
= event_get_clock_data(s
->event
, s
->type
);
2397 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2398 d
->needs_rearm
= true;
2403 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
2404 assert_return(s
, -EINVAL
);
2405 assert_return(clock
, -EINVAL
);
2406 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2407 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2409 *clock
= event_source_type_to_clock(s
->type
);
2413 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
2414 assert_return(s
, -EINVAL
);
2415 assert_return(pid
, -EINVAL
);
2416 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2417 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2419 *pid
= s
->child
.pid
;
2423 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
2424 assert_return(s
, -EINVAL
);
2425 assert_return(mask
, -EINVAL
);
2426 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
2427 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2429 *mask
= s
->inotify
.mask
;
2433 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
2436 assert_return(s
, -EINVAL
);
2437 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2438 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2439 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2441 if (s
->prepare
== callback
)
2444 if (callback
&& s
->prepare
) {
2445 s
->prepare
= callback
;
2449 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
2453 s
->prepare
= callback
;
2456 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
2460 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
2465 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
2466 assert_return(s
, NULL
);
2471 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
2474 assert_return(s
, NULL
);
2477 s
->userdata
= userdata
;
2482 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
2489 if (a
>= USEC_INFINITY
)
2490 return USEC_INFINITY
;
2495 initialize_perturb(e
);
2498 Find a good time to wake up again between times a and b. We
2499 have two goals here:
2501 a) We want to wake up as seldom as possible, hence prefer
2502 later times over earlier times.
2504 b) But if we have to wake up, then let's make sure to
2505 dispatch as much as possible on the entire system.
2507 We implement this by waking up everywhere at the same time
2508 within any given minute if we can, synchronised via the
2509 perturbation value determined from the boot ID. If we can't,
2510 then we try to find the same spot in every 10s, then 1s and
2511 then 250ms step. Otherwise, we pick the last possible time
2515 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
2517 if (_unlikely_(c
< USEC_PER_MINUTE
))
2520 c
-= USEC_PER_MINUTE
;
2526 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
2528 if (_unlikely_(c
< USEC_PER_SEC
*10))
2531 c
-= USEC_PER_SEC
*10;
2537 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
2539 if (_unlikely_(c
< USEC_PER_SEC
))
2548 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
2550 if (_unlikely_(c
< USEC_PER_MSEC
*250))
2553 c
-= USEC_PER_MSEC
*250;
2562 static int event_arm_timer(
2564 struct clock_data
*d
) {
2566 struct itimerspec its
= {};
2567 sd_event_source
*a
, *b
;
2574 if (!d
->needs_rearm
)
2577 d
->needs_rearm
= false;
2579 a
= prioq_peek(d
->earliest
);
2580 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2585 if (d
->next
== USEC_INFINITY
)
2589 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2593 d
->next
= USEC_INFINITY
;
2597 b
= prioq_peek(d
->latest
);
2598 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2600 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2604 assert_se(d
->fd
>= 0);
2607 /* We don' want to disarm here, just mean some time looooong ago. */
2608 its
.it_value
.tv_sec
= 0;
2609 its
.it_value
.tv_nsec
= 1;
2611 timespec_store(&its
.it_value
, t
);
2613 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2621 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2624 assert(s
->type
== SOURCE_IO
);
2626 /* If the event source was already pending, we just OR in the
2627 * new revents, otherwise we reset the value. The ORing is
2628 * necessary to handle EPOLLONESHOT events properly where
2629 * readability might happen independently of writability, and
2630 * we need to keep track of both */
2633 s
->io
.revents
|= revents
;
2635 s
->io
.revents
= revents
;
2637 return source_set_pending(s
, true);
2640 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2647 assert_return(events
== EPOLLIN
, -EIO
);
2649 ss
= read(fd
, &x
, sizeof(x
));
2651 if (IN_SET(errno
, EAGAIN
, EINTR
))
2657 if (_unlikely_(ss
!= sizeof(x
)))
2661 *next
= USEC_INFINITY
;
2666 static int process_timer(
2669 struct clock_data
*d
) {
2678 s
= prioq_peek(d
->earliest
);
2681 s
->enabled
== SD_EVENT_OFF
||
2685 r
= source_set_pending(s
, true);
2689 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2690 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2691 d
->needs_rearm
= true;
2697 static int process_child(sd_event
*e
) {
2704 e
->need_process_child
= false;
2707 So, this is ugly. We iteratively invoke waitid() with P_PID
2708 + WNOHANG for each PID we wait for, instead of using
2709 P_ALL. This is because we only want to get child
2710 information of very specific child processes, and not all
2711 of them. We might not have processed the SIGCHLD even of a
2712 previous invocation and we don't want to maintain a
2713 unbounded *per-child* event queue, hence we really don't
2714 want anything flushed out of the kernel's queue that we
2715 don't care about. Since this is O(n) this means that if you
2716 have a lot of processes you probably want to handle SIGCHLD
2719 We do not reap the children here (by using WNOWAIT), this
2720 is only done after the event source is dispatched so that
2721 the callback still sees the process as a zombie.
2724 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2725 assert(s
->type
== SOURCE_CHILD
);
2730 if (s
->enabled
== SD_EVENT_OFF
)
2733 zero(s
->child
.siginfo
);
2734 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2735 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2739 if (s
->child
.siginfo
.si_pid
!= 0) {
2740 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2742 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2743 /* If the child isn't dead then let's
2744 * immediately remove the state change
2745 * from the queue, since there's no
2746 * benefit in leaving it queued */
2748 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2749 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2752 r
= source_set_pending(s
, true);
2761 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2762 bool read_one
= false;
2767 assert_return(events
== EPOLLIN
, -EIO
);
2769 /* If there's a signal queued on this priority and SIGCHLD is
2770 on this priority too, then make sure to recheck the
2771 children we watch. This is because we only ever dequeue
2772 the first signal per priority, and if we dequeue one, and
2773 SIGCHLD might be enqueued later we wouldn't know, but we
2774 might have higher priority children we care about hence we
2775 need to check that explicitly. */
2777 if (sigismember(&d
->sigset
, SIGCHLD
))
2778 e
->need_process_child
= true;
2780 /* If there's already an event source pending for this
2781 * priority we don't read another */
2786 struct signalfd_siginfo si
;
2788 sd_event_source
*s
= NULL
;
2790 n
= read(d
->fd
, &si
, sizeof(si
));
2792 if (IN_SET(errno
, EAGAIN
, EINTR
))
2798 if (_unlikely_(n
!= sizeof(si
)))
2801 assert(SIGNAL_VALID(si
.ssi_signo
));
2805 if (e
->signal_sources
)
2806 s
= e
->signal_sources
[si
.ssi_signo
];
2812 s
->signal
.siginfo
= si
;
2815 r
= source_set_pending(s
, true);
2823 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
) {
2829 assert_return(revents
== EPOLLIN
, -EIO
);
2831 /* If there's already an event source pending for this priority, don't read another */
2832 if (d
->n_pending
> 0)
2835 /* Is the read buffer non-empty? If so, let's not read more */
2836 if (d
->buffer_filled
> 0)
2839 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
2841 if (IN_SET(errno
, EAGAIN
, EINTR
))
2848 d
->buffer_filled
= (size_t) n
;
2849 LIST_PREPEND(buffered
, e
->inotify_data_buffered
, d
);
2854 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
2857 assert(sz
<= d
->buffer_filled
);
2862 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2863 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
2864 d
->buffer_filled
-= sz
;
2866 if (d
->buffer_filled
== 0)
2867 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
2870 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
2876 /* If there's already an event source pending for this priority, don't read another */
2877 if (d
->n_pending
> 0)
2880 while (d
->buffer_filled
> 0) {
2883 /* Let's validate that the event structures are complete */
2884 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
2887 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2888 if (d
->buffer_filled
< sz
)
2891 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
2892 struct inode_data
*inode_data
;
2895 /* The queue overran, let's pass this event to all event sources connected to this inotify
2898 HASHMAP_FOREACH(inode_data
, d
->inodes
, i
) {
2901 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2903 if (s
->enabled
== SD_EVENT_OFF
)
2906 r
= source_set_pending(s
, true);
2912 struct inode_data
*inode_data
;
2915 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2916 * our watch descriptor table. */
2917 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
2919 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2921 event_inotify_data_drop(e
, d
, sz
);
2925 /* The watch descriptor was removed by the kernel, let's drop it here too */
2926 inode_data
->wd
= -1;
2928 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
2930 event_inotify_data_drop(e
, d
, sz
);
2935 /* Trigger all event sources that are interested in these events. Also trigger all event
2936 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2937 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2939 if (s
->enabled
== SD_EVENT_OFF
)
2942 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
2943 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
2946 r
= source_set_pending(s
, true);
2952 /* Something pending now? If so, let's finish, otherwise let's read more. */
2953 if (d
->n_pending
> 0)
2960 static int process_inotify(sd_event
*e
) {
2961 struct inotify_data
*d
;
2966 LIST_FOREACH(buffered
, d
, e
->inotify_data_buffered
) {
2967 r
= event_inotify_data_process(e
, d
);
2977 static int source_dispatch(sd_event_source
*s
) {
2978 EventSourceType saved_type
;
2982 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2984 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2986 saved_type
= s
->type
;
2988 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2989 r
= source_set_pending(s
, false);
2994 if (s
->type
!= SOURCE_POST
) {
2998 /* If we execute a non-post source, let's mark all
2999 * post sources as pending */
3001 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
3002 if (z
->enabled
== SD_EVENT_OFF
)
3005 r
= source_set_pending(z
, true);
3011 if (s
->enabled
== SD_EVENT_ONESHOT
) {
3012 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3017 s
->dispatching
= true;
3022 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
3025 case SOURCE_TIME_REALTIME
:
3026 case SOURCE_TIME_BOOTTIME
:
3027 case SOURCE_TIME_MONOTONIC
:
3028 case SOURCE_TIME_REALTIME_ALARM
:
3029 case SOURCE_TIME_BOOTTIME_ALARM
:
3030 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
3034 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
3037 case SOURCE_CHILD
: {
3040 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
3042 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
3044 /* Now, reap the PID for good. */
3046 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
3052 r
= s
->defer
.callback(s
, s
->userdata
);
3056 r
= s
->post
.callback(s
, s
->userdata
);
3060 r
= s
->exit
.callback(s
, s
->userdata
);
3063 case SOURCE_INOTIFY
: {
3064 struct sd_event
*e
= s
->event
;
3065 struct inotify_data
*d
;
3068 assert(s
->inotify
.inode_data
);
3069 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
3071 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
3072 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
3073 assert(d
->buffer_filled
>= sz
);
3075 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
3077 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3079 if (d
->n_pending
== 0)
3080 event_inotify_data_drop(e
, d
, sz
);
3085 case SOURCE_WATCHDOG
:
3086 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
3087 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
3088 assert_not_reached("Wut? I shouldn't exist.");
3091 s
->dispatching
= false;
3094 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
3095 strna(s
->description
), event_source_type_to_string(saved_type
));
3100 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3105 static int event_prepare(sd_event
*e
) {
3113 s
= prioq_peek(e
->prepare
);
3114 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
3117 s
->prepare_iteration
= e
->iteration
;
3118 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
3124 s
->dispatching
= true;
3125 r
= s
->prepare(s
, s
->userdata
);
3126 s
->dispatching
= false;
3129 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3130 strna(s
->description
), event_source_type_to_string(s
->type
));
3135 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3141 static int dispatch_exit(sd_event
*e
) {
3143 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3148 p
= prioq_peek(e
->exit
);
3149 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
3150 e
->state
= SD_EVENT_FINISHED
;
3154 ref
= sd_event_ref(e
);
3156 e
->state
= SD_EVENT_EXITING
;
3157 r
= source_dispatch(p
);
3158 e
->state
= SD_EVENT_INITIAL
;
3162 static sd_event_source
* event_next_pending(sd_event
*e
) {
3167 p
= prioq_peek(e
->pending
);
3171 if (p
->enabled
== SD_EVENT_OFF
)
3177 static int arm_watchdog(sd_event
*e
) {
3178 struct itimerspec its
= {};
3183 assert(e
->watchdog_fd
>= 0);
3185 t
= sleep_between(e
,
3186 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
3187 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
3189 timespec_store(&its
.it_value
, t
);
3191 /* Make sure we never set the watchdog to 0, which tells the
3192 * kernel to disable it. */
3193 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
3194 its
.it_value
.tv_nsec
= 1;
3196 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
3203 static int process_watchdog(sd_event
*e
) {
3209 /* Don't notify watchdog too often */
3210 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
3213 sd_notify(false, "WATCHDOG=1");
3214 e
->watchdog_last
= e
->timestamp
.monotonic
;
3216 return arm_watchdog(e
);
3219 static void event_close_inode_data_fds(sd_event
*e
) {
3220 struct inode_data
*d
;
3224 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3225 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3226 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3227 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3230 while ((d
= e
->inode_data_to_close
)) {
3232 d
->fd
= safe_close(d
->fd
);
3234 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
3238 _public_
int sd_event_prepare(sd_event
*e
) {
3241 assert_return(e
, -EINVAL
);
3242 assert_return(e
= event_resolve(e
), -ENOPKG
);
3243 assert_return(!event_pid_changed(e
), -ECHILD
);
3244 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3245 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3247 if (e
->exit_requested
)
3252 e
->state
= SD_EVENT_PREPARING
;
3253 r
= event_prepare(e
);
3254 e
->state
= SD_EVENT_INITIAL
;
3258 r
= event_arm_timer(e
, &e
->realtime
);
3262 r
= event_arm_timer(e
, &e
->boottime
);
3266 r
= event_arm_timer(e
, &e
->monotonic
);
3270 r
= event_arm_timer(e
, &e
->realtime_alarm
);
3274 r
= event_arm_timer(e
, &e
->boottime_alarm
);
3278 event_close_inode_data_fds(e
);
3280 if (event_next_pending(e
) || e
->need_process_child
)
3283 e
->state
= SD_EVENT_ARMED
;
3288 e
->state
= SD_EVENT_ARMED
;
3289 r
= sd_event_wait(e
, 0);
3291 e
->state
= SD_EVENT_ARMED
;
3296 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
3297 struct epoll_event
*ev_queue
;
3298 unsigned ev_queue_max
;
3301 assert_return(e
, -EINVAL
);
3302 assert_return(e
= event_resolve(e
), -ENOPKG
);
3303 assert_return(!event_pid_changed(e
), -ECHILD
);
3304 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3305 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
3307 if (e
->exit_requested
) {
3308 e
->state
= SD_EVENT_PENDING
;
3312 ev_queue_max
= MAX(e
->n_sources
, 1u);
3313 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
3315 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3316 if (e
->inotify_data_buffered
)
3319 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
3320 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
3322 if (errno
== EINTR
) {
3323 e
->state
= SD_EVENT_PENDING
;
3331 triple_timestamp_get(&e
->timestamp
);
3333 for (i
= 0; i
< m
; i
++) {
3335 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
3336 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
3338 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
3342 case WAKEUP_EVENT_SOURCE
:
3343 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3346 case WAKEUP_CLOCK_DATA
: {
3347 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
3348 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
3352 case WAKEUP_SIGNAL_DATA
:
3353 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3356 case WAKEUP_INOTIFY_DATA
:
3357 r
= event_inotify_data_read(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3361 assert_not_reached("Invalid wake-up pointer");
3368 r
= process_watchdog(e
);
3372 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
3376 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
3380 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
3384 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
3388 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
3392 if (e
->need_process_child
) {
3393 r
= process_child(e
);
3398 r
= process_inotify(e
);
3402 if (event_next_pending(e
)) {
3403 e
->state
= SD_EVENT_PENDING
;
3411 e
->state
= SD_EVENT_INITIAL
;
3416 _public_
int sd_event_dispatch(sd_event
*e
) {
3420 assert_return(e
, -EINVAL
);
3421 assert_return(e
= event_resolve(e
), -ENOPKG
);
3422 assert_return(!event_pid_changed(e
), -ECHILD
);
3423 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3424 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
3426 if (e
->exit_requested
)
3427 return dispatch_exit(e
);
3429 p
= event_next_pending(e
);
3431 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3433 ref
= sd_event_ref(e
);
3434 e
->state
= SD_EVENT_RUNNING
;
3435 r
= source_dispatch(p
);
3436 e
->state
= SD_EVENT_INITIAL
;
3440 e
->state
= SD_EVENT_INITIAL
;
3445 static void event_log_delays(sd_event
*e
) {
3446 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1];
3450 for (i
= o
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
3451 o
+= snprintf(&b
[o
], sizeof(b
) - o
, "%u ", e
->delays
[i
]);
3454 log_debug("Event loop iterations: %.*s", o
, b
);
3457 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
3460 assert_return(e
, -EINVAL
);
3461 assert_return(e
= event_resolve(e
), -ENOPKG
);
3462 assert_return(!event_pid_changed(e
), -ECHILD
);
3463 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3464 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3466 if (e
->profile_delays
&& e
->last_run
) {
3470 this_run
= now(CLOCK_MONOTONIC
);
3472 l
= u64log2(this_run
- e
->last_run
);
3473 assert(l
< sizeof(e
->delays
));
3476 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
3477 event_log_delays(e
);
3478 e
->last_log
= this_run
;
3482 r
= sd_event_prepare(e
);
3484 /* There was nothing? Then wait... */
3485 r
= sd_event_wait(e
, timeout
);
3487 if (e
->profile_delays
)
3488 e
->last_run
= now(CLOCK_MONOTONIC
);
3491 /* There's something now, then let's dispatch it */
3492 r
= sd_event_dispatch(e
);
3502 _public_
int sd_event_loop(sd_event
*e
) {
3503 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3506 assert_return(e
, -EINVAL
);
3507 assert_return(e
= event_resolve(e
), -ENOPKG
);
3508 assert_return(!event_pid_changed(e
), -ECHILD
);
3509 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3511 ref
= sd_event_ref(e
);
3513 while (e
->state
!= SD_EVENT_FINISHED
) {
3514 r
= sd_event_run(e
, (uint64_t) -1);
3519 return e
->exit_code
;
3522 _public_
int sd_event_get_fd(sd_event
*e
) {
3524 assert_return(e
, -EINVAL
);
3525 assert_return(e
= event_resolve(e
), -ENOPKG
);
3526 assert_return(!event_pid_changed(e
), -ECHILD
);
3531 _public_
int sd_event_get_state(sd_event
*e
) {
3532 assert_return(e
, -EINVAL
);
3533 assert_return(e
= event_resolve(e
), -ENOPKG
);
3534 assert_return(!event_pid_changed(e
), -ECHILD
);
3539 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
3540 assert_return(e
, -EINVAL
);
3541 assert_return(e
= event_resolve(e
), -ENOPKG
);
3542 assert_return(code
, -EINVAL
);
3543 assert_return(!event_pid_changed(e
), -ECHILD
);
3545 if (!e
->exit_requested
)
3548 *code
= e
->exit_code
;
3552 _public_
int sd_event_exit(sd_event
*e
, int code
) {
3553 assert_return(e
, -EINVAL
);
3554 assert_return(e
= event_resolve(e
), -ENOPKG
);
3555 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3556 assert_return(!event_pid_changed(e
), -ECHILD
);
3558 e
->exit_requested
= true;
3559 e
->exit_code
= code
;
3564 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
3565 assert_return(e
, -EINVAL
);
3566 assert_return(e
= event_resolve(e
), -ENOPKG
);
3567 assert_return(usec
, -EINVAL
);
3568 assert_return(!event_pid_changed(e
), -ECHILD
);
3570 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
3573 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3574 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3575 * the purpose of getting the time this doesn't matter. */
3576 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
3579 if (!triple_timestamp_is_set(&e
->timestamp
)) {
3580 /* Implicitly fall back to now() if we never ran
3581 * before and thus have no cached time. */
3586 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
3590 _public_
int sd_event_default(sd_event
**ret
) {
3595 return !!default_event
;
3597 if (default_event
) {
3598 *ret
= sd_event_ref(default_event
);
3602 r
= sd_event_new(&e
);
3606 e
->default_event_ptr
= &default_event
;
3614 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
3615 assert_return(e
, -EINVAL
);
3616 assert_return(e
= event_resolve(e
), -ENOPKG
);
3617 assert_return(tid
, -EINVAL
);
3618 assert_return(!event_pid_changed(e
), -ECHILD
);
3628 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
3631 assert_return(e
, -EINVAL
);
3632 assert_return(e
= event_resolve(e
), -ENOPKG
);
3633 assert_return(!event_pid_changed(e
), -ECHILD
);
3635 if (e
->watchdog
== !!b
)
3639 struct epoll_event ev
;
3641 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
3645 /* Issue first ping immediately */
3646 sd_notify(false, "WATCHDOG=1");
3647 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
3649 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
3650 if (e
->watchdog_fd
< 0)
3653 r
= arm_watchdog(e
);
3657 ev
= (struct epoll_event
) {
3659 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
3662 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
3669 if (e
->watchdog_fd
>= 0) {
3670 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
3671 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3679 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3683 _public_
int sd_event_get_watchdog(sd_event
*e
) {
3684 assert_return(e
, -EINVAL
);
3685 assert_return(e
= event_resolve(e
), -ENOPKG
);
3686 assert_return(!event_pid_changed(e
), -ECHILD
);
3691 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
3692 assert_return(e
, -EINVAL
);
3693 assert_return(e
= event_resolve(e
), -ENOPKG
);
3694 assert_return(!event_pid_changed(e
), -ECHILD
);
3696 *ret
= e
->iteration
;
3700 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
3701 assert_return(s
, -EINVAL
);
3703 s
->destroy_callback
= callback
;
3707 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
3708 assert_return(s
, -EINVAL
);
3711 *ret
= s
->destroy_callback
;
3713 return !!s
->destroy_callback
;