1 /* SPDX-License-Identifier: LGPL-2.1+ */
6 #include <sys/timerfd.h>
13 #include "alloc-util.h"
21 #include "process-util.h"
23 #include "signal-util.h"
24 #include "string-table.h"
25 #include "string-util.h"
26 #include "time-util.h"
29 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31 typedef enum EventSourceType
{
35 SOURCE_TIME_MONOTONIC
,
36 SOURCE_TIME_REALTIME_ALARM
,
37 SOURCE_TIME_BOOTTIME_ALARM
,
45 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
46 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
49 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
51 [SOURCE_TIME_REALTIME
] = "realtime",
52 [SOURCE_TIME_BOOTTIME
] = "bootime",
53 [SOURCE_TIME_MONOTONIC
] = "monotonic",
54 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
55 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
56 [SOURCE_SIGNAL
] = "signal",
57 [SOURCE_CHILD
] = "child",
58 [SOURCE_DEFER
] = "defer",
59 [SOURCE_POST
] = "post",
60 [SOURCE_EXIT
] = "exit",
61 [SOURCE_WATCHDOG
] = "watchdog",
62 [SOURCE_INOTIFY
] = "inotify",
65 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
67 /* All objects we use in epoll events start with this value, so that
68 * we know how to dispatch it */
69 typedef enum WakeupType
{
76 _WAKEUP_TYPE_INVALID
= -1,
79 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
83 struct sd_event_source
{
90 sd_event_handler_t prepare
;
94 EventSourceType type
:5;
101 unsigned pending_index
;
102 unsigned prepare_index
;
103 uint64_t pending_iteration
;
104 uint64_t prepare_iteration
;
106 sd_event_destroy_t destroy_callback
;
108 LIST_FIELDS(sd_event_source
, sources
);
112 sd_event_io_handler_t callback
;
120 sd_event_time_handler_t callback
;
121 usec_t next
, accuracy
;
122 unsigned earliest_index
;
123 unsigned latest_index
;
126 sd_event_signal_handler_t callback
;
127 struct signalfd_siginfo siginfo
;
131 sd_event_child_handler_t callback
;
137 sd_event_handler_t callback
;
140 sd_event_handler_t callback
;
143 sd_event_handler_t callback
;
144 unsigned prioq_index
;
147 sd_event_inotify_handler_t callback
;
149 struct inode_data
*inode_data
;
150 LIST_FIELDS(sd_event_source
, by_inode_data
);
159 /* For all clocks we maintain two priority queues each, one
160 * ordered for the earliest times the events may be
161 * dispatched, and one ordered by the latest times they must
162 * have been dispatched. The range between the top entries in
163 * the two prioqs is the time window we can freely schedule
176 /* For each priority we maintain one signal fd, so that we
177 * only have to dequeue a single event per priority at a
183 sd_event_source
*current
;
186 /* A structure listing all event sources currently watching a specific inode */
188 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
192 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
193 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
194 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
195 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
196 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
200 /* The inotify "watch descriptor" */
203 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
204 * most recently been set on the watch descriptor. */
205 uint32_t combined_mask
;
207 /* All event sources subscribed to this inode */
208 LIST_HEAD(sd_event_source
, event_sources
);
210 /* The inotify object we watch this inode with */
211 struct inotify_data
*inotify_data
;
213 /* A linked list of all inode data objects with fds to close (see above) */
214 LIST_FIELDS(struct inode_data
, to_close
);
217 /* A structure encapsulating an inotify fd */
218 struct inotify_data
{
221 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
227 Hashmap
*inodes
; /* The inode_data structures keyed by dev+ino */
228 Hashmap
*wd
; /* The inode_data structures keyed by the watch descriptor for each */
230 /* The buffer we read inotify events into */
231 union inotify_event_buffer buffer
;
232 size_t buffer_filled
; /* fill level of the buffer */
234 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
235 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
236 * the events locally if they can't be coalesced). */
239 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
240 * to make it efficient to figure out what inotify objects to process data on next. */
241 LIST_FIELDS(struct inotify_data
, buffered
);
253 /* timerfd_create() only supports these five clocks so far. We
254 * can add support for more clocks when the kernel learns to
255 * deal with them, too. */
256 struct clock_data realtime
;
257 struct clock_data boottime
;
258 struct clock_data monotonic
;
259 struct clock_data realtime_alarm
;
260 struct clock_data boottime_alarm
;
264 sd_event_source
**signal_sources
; /* indexed by signal number */
265 Hashmap
*signal_data
; /* indexed by priority */
267 Hashmap
*child_sources
;
268 unsigned n_enabled_child_sources
;
274 Hashmap
*inotify_data
; /* indexed by priority */
276 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
277 LIST_HEAD(struct inode_data
, inode_data_to_close
);
279 /* A list of inotify objects that already have events buffered which aren't processed yet */
280 LIST_HEAD(struct inotify_data
, inotify_data_buffered
);
285 triple_timestamp timestamp
;
288 bool exit_requested
:1;
289 bool need_process_child
:1;
291 bool profile_delays
:1;
296 sd_event
**default_event_ptr
;
298 usec_t watchdog_last
, watchdog_period
;
302 LIST_HEAD(sd_event_source
, sources
);
304 usec_t last_run
, last_log
;
305 unsigned delays
[sizeof(usec_t
) * 8];
308 static thread_local sd_event
*default_event
= NULL
;
310 static void source_disconnect(sd_event_source
*s
);
311 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
313 static sd_event
*event_resolve(sd_event
*e
) {
314 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
317 static int pending_prioq_compare(const void *a
, const void *b
) {
318 const sd_event_source
*x
= a
, *y
= b
;
323 /* Enabled ones first */
324 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
326 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
329 /* Lower priority values first */
330 if (x
->priority
< y
->priority
)
332 if (x
->priority
> y
->priority
)
335 /* Older entries first */
336 if (x
->pending_iteration
< y
->pending_iteration
)
338 if (x
->pending_iteration
> y
->pending_iteration
)
344 static int prepare_prioq_compare(const void *a
, const void *b
) {
345 const sd_event_source
*x
= a
, *y
= b
;
350 /* Enabled ones first */
351 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
353 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
356 /* Move most recently prepared ones last, so that we can stop
357 * preparing as soon as we hit one that has already been
358 * prepared in the current iteration */
359 if (x
->prepare_iteration
< y
->prepare_iteration
)
361 if (x
->prepare_iteration
> y
->prepare_iteration
)
364 /* Lower priority values first */
365 if (x
->priority
< y
->priority
)
367 if (x
->priority
> y
->priority
)
373 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
374 const sd_event_source
*x
= a
, *y
= b
;
376 assert(EVENT_SOURCE_IS_TIME(x
->type
));
377 assert(x
->type
== y
->type
);
379 /* Enabled ones first */
380 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
382 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
385 /* Move the pending ones to the end */
386 if (!x
->pending
&& y
->pending
)
388 if (x
->pending
&& !y
->pending
)
392 if (x
->time
.next
< y
->time
.next
)
394 if (x
->time
.next
> y
->time
.next
)
400 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
401 return usec_add(s
->time
.next
, s
->time
.accuracy
);
404 static int latest_time_prioq_compare(const void *a
, const void *b
) {
405 const sd_event_source
*x
= a
, *y
= b
;
407 assert(EVENT_SOURCE_IS_TIME(x
->type
));
408 assert(x
->type
== y
->type
);
410 /* Enabled ones first */
411 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
413 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
416 /* Move the pending ones to the end */
417 if (!x
->pending
&& y
->pending
)
419 if (x
->pending
&& !y
->pending
)
423 if (time_event_source_latest(x
) < time_event_source_latest(y
))
425 if (time_event_source_latest(x
) > time_event_source_latest(y
))
431 static int exit_prioq_compare(const void *a
, const void *b
) {
432 const sd_event_source
*x
= a
, *y
= b
;
434 assert(x
->type
== SOURCE_EXIT
);
435 assert(y
->type
== SOURCE_EXIT
);
437 /* Enabled ones first */
438 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
440 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
443 /* Lower priority values first */
444 if (x
->priority
< y
->priority
)
446 if (x
->priority
> y
->priority
)
452 static void free_clock_data(struct clock_data
*d
) {
454 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
457 prioq_free(d
->earliest
);
458 prioq_free(d
->latest
);
461 static void event_free(sd_event
*e
) {
466 while ((s
= e
->sources
)) {
468 source_disconnect(s
);
469 sd_event_source_unref(s
);
472 assert(e
->n_sources
== 0);
474 if (e
->default_event_ptr
)
475 *(e
->default_event_ptr
) = NULL
;
477 safe_close(e
->epoll_fd
);
478 safe_close(e
->watchdog_fd
);
480 free_clock_data(&e
->realtime
);
481 free_clock_data(&e
->boottime
);
482 free_clock_data(&e
->monotonic
);
483 free_clock_data(&e
->realtime_alarm
);
484 free_clock_data(&e
->boottime_alarm
);
486 prioq_free(e
->pending
);
487 prioq_free(e
->prepare
);
490 free(e
->signal_sources
);
491 hashmap_free(e
->signal_data
);
493 hashmap_free(e
->inotify_data
);
495 hashmap_free(e
->child_sources
);
496 set_free(e
->post_sources
);
500 _public_
int sd_event_new(sd_event
** ret
) {
504 assert_return(ret
, -EINVAL
);
506 e
= new(sd_event
, 1);
514 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
516 .realtime
.next
= USEC_INFINITY
,
517 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
519 .boottime
.next
= USEC_INFINITY
,
520 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
522 .monotonic
.next
= USEC_INFINITY
,
523 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
524 .realtime_alarm
.fd
= -1,
525 .realtime_alarm
.next
= USEC_INFINITY
,
526 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
527 .boottime_alarm
.fd
= -1,
528 .boottime_alarm
.next
= USEC_INFINITY
,
529 .perturb
= USEC_INFINITY
,
530 .original_pid
= getpid_cached(),
533 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
537 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
538 if (e
->epoll_fd
< 0) {
543 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
545 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
546 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
547 e
->profile_delays
= true;
558 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
563 assert(e
->n_ref
>= 1);
569 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
574 assert(e
->n_ref
>= 1);
583 static bool event_pid_changed(sd_event
*e
) {
586 /* We don't support people creating an event loop and keeping
587 * it around over a fork(). Let's complain. */
589 return e
->original_pid
!= getpid_cached();
592 static void source_io_unregister(sd_event_source
*s
) {
596 assert(s
->type
== SOURCE_IO
);
598 if (event_pid_changed(s
->event
))
601 if (!s
->io
.registered
)
604 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
606 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
607 strna(s
->description
), event_source_type_to_string(s
->type
));
609 s
->io
.registered
= false;
612 static int source_io_register(
617 struct epoll_event ev
;
621 assert(s
->type
== SOURCE_IO
);
622 assert(enabled
!= SD_EVENT_OFF
);
624 ev
= (struct epoll_event
) {
625 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
629 if (s
->io
.registered
)
630 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
632 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
636 s
->io
.registered
= true;
641 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
645 case SOURCE_TIME_REALTIME
:
646 return CLOCK_REALTIME
;
648 case SOURCE_TIME_BOOTTIME
:
649 return CLOCK_BOOTTIME
;
651 case SOURCE_TIME_MONOTONIC
:
652 return CLOCK_MONOTONIC
;
654 case SOURCE_TIME_REALTIME_ALARM
:
655 return CLOCK_REALTIME_ALARM
;
657 case SOURCE_TIME_BOOTTIME_ALARM
:
658 return CLOCK_BOOTTIME_ALARM
;
661 return (clockid_t
) -1;
665 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
670 return SOURCE_TIME_REALTIME
;
673 return SOURCE_TIME_BOOTTIME
;
675 case CLOCK_MONOTONIC
:
676 return SOURCE_TIME_MONOTONIC
;
678 case CLOCK_REALTIME_ALARM
:
679 return SOURCE_TIME_REALTIME_ALARM
;
681 case CLOCK_BOOTTIME_ALARM
:
682 return SOURCE_TIME_BOOTTIME_ALARM
;
685 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
689 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
694 case SOURCE_TIME_REALTIME
:
697 case SOURCE_TIME_BOOTTIME
:
700 case SOURCE_TIME_MONOTONIC
:
701 return &e
->monotonic
;
703 case SOURCE_TIME_REALTIME_ALARM
:
704 return &e
->realtime_alarm
;
706 case SOURCE_TIME_BOOTTIME_ALARM
:
707 return &e
->boottime_alarm
;
714 static int event_make_signal_data(
717 struct signal_data
**ret
) {
719 struct epoll_event ev
;
720 struct signal_data
*d
;
728 if (event_pid_changed(e
))
731 if (e
->signal_sources
&& e
->signal_sources
[sig
])
732 priority
= e
->signal_sources
[sig
]->priority
;
734 priority
= SD_EVENT_PRIORITY_NORMAL
;
736 d
= hashmap_get(e
->signal_data
, &priority
);
738 if (sigismember(&d
->sigset
, sig
) > 0) {
744 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
748 d
= new(struct signal_data
, 1);
752 *d
= (struct signal_data
) {
753 .wakeup
= WAKEUP_SIGNAL_DATA
,
755 .priority
= priority
,
758 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
768 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
770 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
784 d
->fd
= fd_move_above_stdio(r
);
786 ev
= (struct epoll_event
) {
791 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
804 d
->fd
= safe_close(d
->fd
);
805 hashmap_remove(e
->signal_data
, &d
->priority
);
812 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
816 /* Turns off the specified signal in the signal data
817 * object. If the signal mask of the object becomes empty that
820 if (sigismember(&d
->sigset
, sig
) == 0)
823 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
825 if (sigisemptyset(&d
->sigset
)) {
827 /* If all the mask is all-zero we can get rid of the structure */
828 hashmap_remove(e
->signal_data
, &d
->priority
);
836 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
837 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
840 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
841 struct signal_data
*d
;
842 static const int64_t zero_priority
= 0;
846 /* Rechecks if the specified signal is still something we are
847 * interested in. If not, we'll unmask it, and possibly drop
848 * the signalfd for it. */
850 if (sig
== SIGCHLD
&&
851 e
->n_enabled_child_sources
> 0)
854 if (e
->signal_sources
&&
855 e
->signal_sources
[sig
] &&
856 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
860 * The specified signal might be enabled in three different queues:
862 * 1) the one that belongs to the priority passed (if it is non-NULL)
863 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
864 * 3) the 0 priority (to cover the SIGCHLD case)
866 * Hence, let's remove it from all three here.
870 d
= hashmap_get(e
->signal_data
, priority
);
872 event_unmask_signal_data(e
, d
, sig
);
875 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
876 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
878 event_unmask_signal_data(e
, d
, sig
);
881 d
= hashmap_get(e
->signal_data
, &zero_priority
);
883 event_unmask_signal_data(e
, d
, sig
);
886 static void source_disconnect(sd_event_source
*s
) {
894 assert(s
->event
->n_sources
> 0);
900 source_io_unregister(s
);
904 case SOURCE_TIME_REALTIME
:
905 case SOURCE_TIME_BOOTTIME
:
906 case SOURCE_TIME_MONOTONIC
:
907 case SOURCE_TIME_REALTIME_ALARM
:
908 case SOURCE_TIME_BOOTTIME_ALARM
: {
909 struct clock_data
*d
;
911 d
= event_get_clock_data(s
->event
, s
->type
);
914 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
915 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
916 d
->needs_rearm
= true;
921 if (s
->signal
.sig
> 0) {
923 if (s
->event
->signal_sources
)
924 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
926 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
932 if (s
->child
.pid
> 0) {
933 if (s
->enabled
!= SD_EVENT_OFF
) {
934 assert(s
->event
->n_enabled_child_sources
> 0);
935 s
->event
->n_enabled_child_sources
--;
938 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
939 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
949 set_remove(s
->event
->post_sources
, s
);
953 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
956 case SOURCE_INOTIFY
: {
957 struct inode_data
*inode_data
;
959 inode_data
= s
->inotify
.inode_data
;
961 struct inotify_data
*inotify_data
;
962 assert_se(inotify_data
= inode_data
->inotify_data
);
964 /* Detach this event source from the inode object */
965 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
966 s
->inotify
.inode_data
= NULL
;
969 assert(inotify_data
->n_pending
> 0);
970 inotify_data
->n_pending
--;
973 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
974 * continued to being watched. That's because inotify doesn't really have an API for that: we
975 * can only change watch masks with access to the original inode either by fd or by path. But
976 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
977 * continously and keeping the mount busy which we can't really do. We could reconstruct the
978 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
979 * there), but given the need for open_by_handle_at() which is privileged and not universally
980 * available this would be quite an incomplete solution. Hence we go the other way, leave the
981 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
982 * anymore after reception. Yes, this sucks, but … Linux … */
984 /* Maybe release the inode data (and its inotify) */
985 event_gc_inode_data(s
->event
, inode_data
);
992 assert_not_reached("Wut? I shouldn't exist.");
996 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
999 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1003 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
1005 LIST_REMOVE(sources
, event
->sources
, s
);
1009 sd_event_unref(event
);
1012 static void source_free(sd_event_source
*s
) {
1015 source_disconnect(s
);
1017 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
1018 s
->io
.fd
= safe_close(s
->io
.fd
);
1020 if (s
->destroy_callback
)
1021 s
->destroy_callback(s
->userdata
);
1023 free(s
->description
);
1027 static int source_set_pending(sd_event_source
*s
, bool b
) {
1031 assert(s
->type
!= SOURCE_EXIT
);
1033 if (s
->pending
== b
)
1039 s
->pending_iteration
= s
->event
->iteration
;
1041 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
1047 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
1049 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
1050 struct clock_data
*d
;
1052 d
= event_get_clock_data(s
->event
, s
->type
);
1055 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1056 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1057 d
->needs_rearm
= true;
1060 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
1061 struct signal_data
*d
;
1063 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
1064 if (d
&& d
->current
== s
)
1068 if (s
->type
== SOURCE_INOTIFY
) {
1070 assert(s
->inotify
.inode_data
);
1071 assert(s
->inotify
.inode_data
->inotify_data
);
1074 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
1076 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
1077 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
1084 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
1089 s
= new(sd_event_source
, 1);
1093 *s
= (struct sd_event_source
) {
1096 .floating
= floating
,
1098 .pending_index
= PRIOQ_IDX_NULL
,
1099 .prepare_index
= PRIOQ_IDX_NULL
,
1105 LIST_PREPEND(sources
, e
->sources
, s
);
1111 _public_
int sd_event_add_io(
1113 sd_event_source
**ret
,
1116 sd_event_io_handler_t callback
,
1122 assert_return(e
, -EINVAL
);
1123 assert_return(e
= event_resolve(e
), -ENOPKG
);
1124 assert_return(fd
>= 0, -EBADF
);
1125 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1126 assert_return(callback
, -EINVAL
);
1127 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1128 assert_return(!event_pid_changed(e
), -ECHILD
);
1130 s
= source_new(e
, !ret
, SOURCE_IO
);
1134 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1136 s
->io
.events
= events
;
1137 s
->io
.callback
= callback
;
1138 s
->userdata
= userdata
;
1139 s
->enabled
= SD_EVENT_ON
;
1141 r
= source_io_register(s
, s
->enabled
, events
);
1153 static void initialize_perturb(sd_event
*e
) {
1154 sd_id128_t bootid
= {};
1156 /* When we sleep for longer, we try to realign the wakeup to
1157 the same time wihtin each minute/second/250ms, so that
1158 events all across the system can be coalesced into a single
1159 CPU wakeup. However, let's take some system-specific
1160 randomness for this value, so that in a network of systems
1161 with synced clocks timer events are distributed a
1162 bit. Here, we calculate a perturbation usec offset from the
1165 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1168 if (sd_id128_get_boot(&bootid
) >= 0)
1169 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1172 static int event_setup_timer_fd(
1174 struct clock_data
*d
,
1177 struct epoll_event ev
;
1183 if (_likely_(d
->fd
>= 0))
1186 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1190 fd
= fd_move_above_stdio(fd
);
1192 ev
= (struct epoll_event
) {
1197 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1207 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1210 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1213 _public_
int sd_event_add_time(
1215 sd_event_source
**ret
,
1219 sd_event_time_handler_t callback
,
1222 EventSourceType type
;
1224 struct clock_data
*d
;
1227 assert_return(e
, -EINVAL
);
1228 assert_return(e
= event_resolve(e
), -ENOPKG
);
1229 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1230 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1231 assert_return(!event_pid_changed(e
), -ECHILD
);
1233 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1236 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1241 callback
= time_exit_callback
;
1243 d
= event_get_clock_data(e
, type
);
1246 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1250 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1255 r
= event_setup_timer_fd(e
, d
, clock
);
1260 s
= source_new(e
, !ret
, type
);
1264 s
->time
.next
= usec
;
1265 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1266 s
->time
.callback
= callback
;
1267 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1268 s
->userdata
= userdata
;
1269 s
->enabled
= SD_EVENT_ONESHOT
;
1271 d
->needs_rearm
= true;
1273 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1277 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1291 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1294 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1297 _public_
int sd_event_add_signal(
1299 sd_event_source
**ret
,
1301 sd_event_signal_handler_t callback
,
1305 struct signal_data
*d
;
1309 assert_return(e
, -EINVAL
);
1310 assert_return(e
= event_resolve(e
), -ENOPKG
);
1311 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1312 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1313 assert_return(!event_pid_changed(e
), -ECHILD
);
1316 callback
= signal_exit_callback
;
1318 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1322 if (!sigismember(&ss
, sig
))
1325 if (!e
->signal_sources
) {
1326 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1327 if (!e
->signal_sources
)
1329 } else if (e
->signal_sources
[sig
])
1332 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1336 s
->signal
.sig
= sig
;
1337 s
->signal
.callback
= callback
;
1338 s
->userdata
= userdata
;
1339 s
->enabled
= SD_EVENT_ON
;
1341 e
->signal_sources
[sig
] = s
;
1343 r
= event_make_signal_data(e
, sig
, &d
);
1349 /* Use the signal name as description for the event source by default */
1350 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1358 _public_
int sd_event_add_child(
1360 sd_event_source
**ret
,
1363 sd_event_child_handler_t callback
,
1369 assert_return(e
, -EINVAL
);
1370 assert_return(e
= event_resolve(e
), -ENOPKG
);
1371 assert_return(pid
> 1, -EINVAL
);
1372 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1373 assert_return(options
!= 0, -EINVAL
);
1374 assert_return(callback
, -EINVAL
);
1375 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1376 assert_return(!event_pid_changed(e
), -ECHILD
);
1378 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1382 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1385 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1390 s
->child
.options
= options
;
1391 s
->child
.callback
= callback
;
1392 s
->userdata
= userdata
;
1393 s
->enabled
= SD_EVENT_ONESHOT
;
1395 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1401 e
->n_enabled_child_sources
++;
1403 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1405 e
->n_enabled_child_sources
--;
1410 e
->need_process_child
= true;
1418 _public_
int sd_event_add_defer(
1420 sd_event_source
**ret
,
1421 sd_event_handler_t callback
,
1427 assert_return(e
, -EINVAL
);
1428 assert_return(e
= event_resolve(e
), -ENOPKG
);
1429 assert_return(callback
, -EINVAL
);
1430 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1431 assert_return(!event_pid_changed(e
), -ECHILD
);
1433 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1437 s
->defer
.callback
= callback
;
1438 s
->userdata
= userdata
;
1439 s
->enabled
= SD_EVENT_ONESHOT
;
1441 r
= source_set_pending(s
, true);
1453 _public_
int sd_event_add_post(
1455 sd_event_source
**ret
,
1456 sd_event_handler_t callback
,
1462 assert_return(e
, -EINVAL
);
1463 assert_return(e
= event_resolve(e
), -ENOPKG
);
1464 assert_return(callback
, -EINVAL
);
1465 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1466 assert_return(!event_pid_changed(e
), -ECHILD
);
1468 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1472 s
= source_new(e
, !ret
, SOURCE_POST
);
1476 s
->post
.callback
= callback
;
1477 s
->userdata
= userdata
;
1478 s
->enabled
= SD_EVENT_ON
;
1480 r
= set_put(e
->post_sources
, s
);
1492 _public_
int sd_event_add_exit(
1494 sd_event_source
**ret
,
1495 sd_event_handler_t callback
,
1501 assert_return(e
, -EINVAL
);
1502 assert_return(e
= event_resolve(e
), -ENOPKG
);
1503 assert_return(callback
, -EINVAL
);
1504 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1505 assert_return(!event_pid_changed(e
), -ECHILD
);
1507 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1511 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1515 s
->exit
.callback
= callback
;
1516 s
->userdata
= userdata
;
1517 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1518 s
->enabled
= SD_EVENT_ONESHOT
;
1520 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1532 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
1538 assert(hashmap_isempty(d
->inodes
));
1539 assert(hashmap_isempty(d
->wd
));
1541 if (d
->buffer_filled
> 0)
1542 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
1544 hashmap_free(d
->inodes
);
1545 hashmap_free(d
->wd
);
1547 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
1550 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
1551 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
1558 static int event_make_inotify_data(
1561 struct inotify_data
**ret
) {
1563 _cleanup_close_
int fd
= -1;
1564 struct inotify_data
*d
;
1565 struct epoll_event ev
;
1570 d
= hashmap_get(e
->inotify_data
, &priority
);
1577 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
1581 fd
= fd_move_above_stdio(fd
);
1583 r
= hashmap_ensure_allocated(&e
->inotify_data
, &uint64_hash_ops
);
1587 d
= new(struct inotify_data
, 1);
1591 *d
= (struct inotify_data
) {
1592 .wakeup
= WAKEUP_INOTIFY_DATA
,
1594 .priority
= priority
,
1597 r
= hashmap_put(e
->inotify_data
, &d
->priority
, d
);
1599 d
->fd
= safe_close(d
->fd
);
1604 ev
= (struct epoll_event
) {
1609 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
1611 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1612 * remove the fd from the epoll first, which we don't want as we couldn't
1613 * add it in the first place. */
1614 event_free_inotify_data(e
, d
);
1624 static int inode_data_compare(const void *a
, const void *b
) {
1625 const struct inode_data
*x
= a
, *y
= b
;
1630 if (x
->dev
< y
->dev
)
1632 if (x
->dev
> y
->dev
)
1635 if (x
->ino
< y
->ino
)
1637 if (x
->ino
> y
->ino
)
1643 static void inode_data_hash_func(const void *p
, struct siphash
*state
) {
1644 const struct inode_data
*d
= p
;
1648 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
1649 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
1652 const struct hash_ops inode_data_hash_ops
= {
1653 .hash
= inode_data_hash_func
,
1654 .compare
= inode_data_compare
1657 static void event_free_inode_data(
1659 struct inode_data
*d
) {
1666 assert(!d
->event_sources
);
1669 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
1673 if (d
->inotify_data
) {
1676 if (d
->inotify_data
->fd
>= 0) {
1677 /* So here's a problem. At the time this runs the watch descriptor might already be
1678 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1679 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1680 * likely case to happen. */
1682 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
1683 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
1686 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
1689 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
1695 static void event_gc_inode_data(
1697 struct inode_data
*d
) {
1699 struct inotify_data
*inotify_data
;
1706 if (d
->event_sources
)
1709 inotify_data
= d
->inotify_data
;
1710 event_free_inode_data(e
, d
);
1712 if (inotify_data
&& hashmap_isempty(inotify_data
->inodes
))
1713 event_free_inotify_data(e
, inotify_data
);
1716 static int event_make_inode_data(
1718 struct inotify_data
*inotify_data
,
1721 struct inode_data
**ret
) {
1723 struct inode_data
*d
, key
;
1727 assert(inotify_data
);
1729 key
= (struct inode_data
) {
1734 d
= hashmap_get(inotify_data
->inodes
, &key
);
1742 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
1746 d
= new(struct inode_data
, 1);
1750 *d
= (struct inode_data
) {
1755 .inotify_data
= inotify_data
,
1758 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
1770 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
1771 bool excl_unlink
= true;
1772 uint32_t combined
= 0;
1777 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1778 * the IN_EXCL_UNLINK flag is ANDed instead.
1780 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1781 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1782 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
1783 * events we don't care for client-side. */
1785 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
1787 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
1788 excl_unlink
= false;
1790 combined
|= s
->inotify
.mask
;
1793 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
1796 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
1797 uint32_t combined_mask
;
1803 combined_mask
= inode_data_determine_mask(d
);
1805 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
1808 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
1812 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
1817 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
1819 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
1825 } else if (d
->wd
!= wd
) {
1827 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1828 (void) inotify_rm_watch(d
->fd
, wd
);
1832 d
->combined_mask
= combined_mask
;
1836 _public_
int sd_event_add_inotify(
1838 sd_event_source
**ret
,
1841 sd_event_inotify_handler_t callback
,
1844 bool rm_inotify
= false, rm_inode
= false;
1845 struct inotify_data
*inotify_data
= NULL
;
1846 struct inode_data
*inode_data
= NULL
;
1847 _cleanup_close_
int fd
= -1;
1852 assert_return(e
, -EINVAL
);
1853 assert_return(e
= event_resolve(e
), -ENOPKG
);
1854 assert_return(path
, -EINVAL
);
1855 assert_return(callback
, -EINVAL
);
1856 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1857 assert_return(!event_pid_changed(e
), -ECHILD
);
1859 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1860 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1861 * the user can't use them for us. */
1862 if (mask
& IN_MASK_ADD
)
1865 fd
= open(path
, O_PATH
|O_CLOEXEC
|
1866 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0)|
1867 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
1871 if (fstat(fd
, &st
) < 0)
1874 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
1878 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
1879 s
->inotify
.mask
= mask
;
1880 s
->inotify
.callback
= callback
;
1881 s
->userdata
= userdata
;
1883 /* Allocate an inotify object for this priority, and an inode object within it */
1884 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
1889 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
1894 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1895 * the event source, until then, for which we need the original inode. */
1896 if (inode_data
->fd
< 0) {
1897 inode_data
->fd
= TAKE_FD(fd
);
1898 LIST_PREPEND(to_close
, e
->inode_data_to_close
, inode_data
);
1901 /* Link our event source to the inode data object */
1902 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1903 s
->inotify
.inode_data
= inode_data
;
1905 rm_inode
= rm_inotify
= false;
1907 /* Actually realize the watch now */
1908 r
= inode_data_realize_watch(e
, inode_data
);
1912 (void) sd_event_source_set_description(s
, path
);
1923 event_free_inode_data(e
, inode_data
);
1926 event_free_inotify_data(e
, inotify_data
);
1931 _public_ sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
1936 assert(s
->n_ref
>= 1);
1942 _public_ sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
1947 assert(s
->n_ref
>= 1);
1950 if (s
->n_ref
<= 0) {
1951 /* Here's a special hack: when we are called from a
1952 * dispatch handler we won't free the event source
1953 * immediately, but we will detach the fd from the
1954 * epoll. This way it is safe for the caller to unref
1955 * the event source and immediately close the fd, but
1956 * we still retain a valid event source object after
1959 if (s
->dispatching
) {
1960 if (s
->type
== SOURCE_IO
)
1961 source_io_unregister(s
);
1963 source_disconnect(s
);
1971 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1972 assert_return(s
, -EINVAL
);
1973 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1975 return free_and_strdup(&s
->description
, description
);
1978 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1979 assert_return(s
, -EINVAL
);
1980 assert_return(description
, -EINVAL
);
1981 assert_return(s
->description
, -ENXIO
);
1982 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1984 *description
= s
->description
;
1988 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1989 assert_return(s
, NULL
);
1994 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1995 assert_return(s
, -EINVAL
);
1996 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1997 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1998 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2003 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
2004 assert_return(s
, -EINVAL
);
2005 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2006 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2011 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
2014 assert_return(s
, -EINVAL
);
2015 assert_return(fd
>= 0, -EBADF
);
2016 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2017 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2022 if (s
->enabled
== SD_EVENT_OFF
) {
2024 s
->io
.registered
= false;
2028 saved_fd
= s
->io
.fd
;
2029 assert(s
->io
.registered
);
2032 s
->io
.registered
= false;
2034 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
2036 s
->io
.fd
= saved_fd
;
2037 s
->io
.registered
= true;
2041 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
2047 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
2048 assert_return(s
, -EINVAL
);
2049 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2054 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
2055 assert_return(s
, -EINVAL
);
2056 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2062 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
2063 assert_return(s
, -EINVAL
);
2064 assert_return(events
, -EINVAL
);
2065 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2066 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2068 *events
= s
->io
.events
;
2072 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
2075 assert_return(s
, -EINVAL
);
2076 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2077 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
2078 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2079 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2081 /* edge-triggered updates are never skipped, so we can reset edges */
2082 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
2085 r
= source_set_pending(s
, false);
2089 if (s
->enabled
!= SD_EVENT_OFF
) {
2090 r
= source_io_register(s
, s
->enabled
, events
);
2095 s
->io
.events
= events
;
2100 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
2101 assert_return(s
, -EINVAL
);
2102 assert_return(revents
, -EINVAL
);
2103 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2104 assert_return(s
->pending
, -ENODATA
);
2105 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2107 *revents
= s
->io
.revents
;
2111 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
2112 assert_return(s
, -EINVAL
);
2113 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
2114 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2116 return s
->signal
.sig
;
2119 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
2120 assert_return(s
, -EINVAL
);
2121 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2123 *priority
= s
->priority
;
2127 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
2128 bool rm_inotify
= false, rm_inode
= false;
2129 struct inotify_data
*new_inotify_data
= NULL
;
2130 struct inode_data
*new_inode_data
= NULL
;
2133 assert_return(s
, -EINVAL
);
2134 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2135 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2137 if (s
->priority
== priority
)
2140 if (s
->type
== SOURCE_INOTIFY
) {
2141 struct inode_data
*old_inode_data
;
2143 assert(s
->inotify
.inode_data
);
2144 old_inode_data
= s
->inotify
.inode_data
;
2146 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2147 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2148 * events we allow priority changes only until the first following iteration. */
2149 if (old_inode_data
->fd
< 0)
2152 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
2157 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
2162 if (new_inode_data
->fd
< 0) {
2163 /* Duplicate the fd for the new inode object if we don't have any yet */
2164 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
2165 if (new_inode_data
->fd
< 0) {
2170 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close
, new_inode_data
);
2173 /* Move the event source to the new inode data structure */
2174 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2175 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2176 s
->inotify
.inode_data
= new_inode_data
;
2178 /* Now create the new watch */
2179 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
2182 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2183 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2184 s
->inotify
.inode_data
= old_inode_data
;
2188 s
->priority
= priority
;
2190 event_gc_inode_data(s
->event
, old_inode_data
);
2192 } else if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
2193 struct signal_data
*old
, *d
;
2195 /* Move us from the signalfd belonging to the old
2196 * priority to the signalfd of the new priority */
2198 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
2200 s
->priority
= priority
;
2202 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
2204 s
->priority
= old
->priority
;
2208 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
2210 s
->priority
= priority
;
2213 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2216 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2218 if (s
->type
== SOURCE_EXIT
)
2219 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2225 event_free_inode_data(s
->event
, new_inode_data
);
2228 event_free_inotify_data(s
->event
, new_inotify_data
);
2233 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
2234 assert_return(s
, -EINVAL
);
2235 assert_return(m
, -EINVAL
);
2236 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2242 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
2245 assert_return(s
, -EINVAL
);
2246 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
2247 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2249 /* If we are dead anyway, we are fine with turning off
2250 * sources, but everything else needs to fail. */
2251 if (s
->event
->state
== SD_EVENT_FINISHED
)
2252 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
2254 if (s
->enabled
== m
)
2257 if (m
== SD_EVENT_OFF
) {
2259 /* Unset the pending flag when this event source is disabled */
2260 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2261 r
= source_set_pending(s
, false);
2269 source_io_unregister(s
);
2273 case SOURCE_TIME_REALTIME
:
2274 case SOURCE_TIME_BOOTTIME
:
2275 case SOURCE_TIME_MONOTONIC
:
2276 case SOURCE_TIME_REALTIME_ALARM
:
2277 case SOURCE_TIME_BOOTTIME_ALARM
: {
2278 struct clock_data
*d
;
2281 d
= event_get_clock_data(s
->event
, s
->type
);
2284 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2285 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2286 d
->needs_rearm
= true;
2293 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2299 assert(s
->event
->n_enabled_child_sources
> 0);
2300 s
->event
->n_enabled_child_sources
--;
2302 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2307 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2312 case SOURCE_INOTIFY
:
2317 assert_not_reached("Wut? I shouldn't exist.");
2322 /* Unset the pending flag when this event source is enabled */
2323 if (s
->enabled
== SD_EVENT_OFF
&& !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2324 r
= source_set_pending(s
, false);
2332 r
= source_io_register(s
, m
, s
->io
.events
);
2339 case SOURCE_TIME_REALTIME
:
2340 case SOURCE_TIME_BOOTTIME
:
2341 case SOURCE_TIME_MONOTONIC
:
2342 case SOURCE_TIME_REALTIME_ALARM
:
2343 case SOURCE_TIME_BOOTTIME_ALARM
: {
2344 struct clock_data
*d
;
2347 d
= event_get_clock_data(s
->event
, s
->type
);
2350 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2351 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2352 d
->needs_rearm
= true;
2360 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2362 s
->enabled
= SD_EVENT_OFF
;
2363 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2371 if (s
->enabled
== SD_EVENT_OFF
)
2372 s
->event
->n_enabled_child_sources
++;
2376 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2378 s
->enabled
= SD_EVENT_OFF
;
2379 s
->event
->n_enabled_child_sources
--;
2380 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2388 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2393 case SOURCE_INOTIFY
:
2398 assert_not_reached("Wut? I shouldn't exist.");
2403 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
2406 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
2411 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
2412 assert_return(s
, -EINVAL
);
2413 assert_return(usec
, -EINVAL
);
2414 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2415 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2417 *usec
= s
->time
.next
;
2421 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
2422 struct clock_data
*d
;
2425 assert_return(s
, -EINVAL
);
2426 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2427 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2428 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2430 r
= source_set_pending(s
, false);
2434 s
->time
.next
= usec
;
2436 d
= event_get_clock_data(s
->event
, s
->type
);
2439 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2440 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2441 d
->needs_rearm
= true;
2446 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
2447 assert_return(s
, -EINVAL
);
2448 assert_return(usec
, -EINVAL
);
2449 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2450 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2452 *usec
= s
->time
.accuracy
;
2456 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
2457 struct clock_data
*d
;
2460 assert_return(s
, -EINVAL
);
2461 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
2462 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2463 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2464 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2466 r
= source_set_pending(s
, false);
2471 usec
= DEFAULT_ACCURACY_USEC
;
2473 s
->time
.accuracy
= usec
;
2475 d
= event_get_clock_data(s
->event
, s
->type
);
2478 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2479 d
->needs_rearm
= true;
2484 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
2485 assert_return(s
, -EINVAL
);
2486 assert_return(clock
, -EINVAL
);
2487 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
2488 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2490 *clock
= event_source_type_to_clock(s
->type
);
2494 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
2495 assert_return(s
, -EINVAL
);
2496 assert_return(pid
, -EINVAL
);
2497 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
2498 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2500 *pid
= s
->child
.pid
;
2504 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
2505 assert_return(s
, -EINVAL
);
2506 assert_return(mask
, -EINVAL
);
2507 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
2508 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2510 *mask
= s
->inotify
.mask
;
2514 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
2517 assert_return(s
, -EINVAL
);
2518 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2519 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2520 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2522 if (s
->prepare
== callback
)
2525 if (callback
&& s
->prepare
) {
2526 s
->prepare
= callback
;
2530 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
2534 s
->prepare
= callback
;
2537 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
2541 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
2546 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
2547 assert_return(s
, NULL
);
2552 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
2555 assert_return(s
, NULL
);
2558 s
->userdata
= userdata
;
2563 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
2570 if (a
>= USEC_INFINITY
)
2571 return USEC_INFINITY
;
2576 initialize_perturb(e
);
2579 Find a good time to wake up again between times a and b. We
2580 have two goals here:
2582 a) We want to wake up as seldom as possible, hence prefer
2583 later times over earlier times.
2585 b) But if we have to wake up, then let's make sure to
2586 dispatch as much as possible on the entire system.
2588 We implement this by waking up everywhere at the same time
2589 within any given minute if we can, synchronised via the
2590 perturbation value determined from the boot ID. If we can't,
2591 then we try to find the same spot in every 10s, then 1s and
2592 then 250ms step. Otherwise, we pick the last possible time
2596 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
2598 if (_unlikely_(c
< USEC_PER_MINUTE
))
2601 c
-= USEC_PER_MINUTE
;
2607 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
2609 if (_unlikely_(c
< USEC_PER_SEC
*10))
2612 c
-= USEC_PER_SEC
*10;
2618 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
2620 if (_unlikely_(c
< USEC_PER_SEC
))
2629 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
2631 if (_unlikely_(c
< USEC_PER_MSEC
*250))
2634 c
-= USEC_PER_MSEC
*250;
2643 static int event_arm_timer(
2645 struct clock_data
*d
) {
2647 struct itimerspec its
= {};
2648 sd_event_source
*a
, *b
;
2655 if (!d
->needs_rearm
)
2658 d
->needs_rearm
= false;
2660 a
= prioq_peek(d
->earliest
);
2661 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
2666 if (d
->next
== USEC_INFINITY
)
2670 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2674 d
->next
= USEC_INFINITY
;
2678 b
= prioq_peek(d
->latest
);
2679 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2681 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2685 assert_se(d
->fd
>= 0);
2688 /* We don' want to disarm here, just mean some time looooong ago. */
2689 its
.it_value
.tv_sec
= 0;
2690 its
.it_value
.tv_nsec
= 1;
2692 timespec_store(&its
.it_value
, t
);
2694 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2702 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2705 assert(s
->type
== SOURCE_IO
);
2707 /* If the event source was already pending, we just OR in the
2708 * new revents, otherwise we reset the value. The ORing is
2709 * necessary to handle EPOLLONESHOT events properly where
2710 * readability might happen independently of writability, and
2711 * we need to keep track of both */
2714 s
->io
.revents
|= revents
;
2716 s
->io
.revents
= revents
;
2718 return source_set_pending(s
, true);
2721 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2728 assert_return(events
== EPOLLIN
, -EIO
);
2730 ss
= read(fd
, &x
, sizeof(x
));
2732 if (IN_SET(errno
, EAGAIN
, EINTR
))
2738 if (_unlikely_(ss
!= sizeof(x
)))
2742 *next
= USEC_INFINITY
;
2747 static int process_timer(
2750 struct clock_data
*d
) {
2759 s
= prioq_peek(d
->earliest
);
2762 s
->enabled
== SD_EVENT_OFF
||
2766 r
= source_set_pending(s
, true);
2770 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2771 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2772 d
->needs_rearm
= true;
2778 static int process_child(sd_event
*e
) {
2785 e
->need_process_child
= false;
2788 So, this is ugly. We iteratively invoke waitid() with P_PID
2789 + WNOHANG for each PID we wait for, instead of using
2790 P_ALL. This is because we only want to get child
2791 information of very specific child processes, and not all
2792 of them. We might not have processed the SIGCHLD even of a
2793 previous invocation and we don't want to maintain a
2794 unbounded *per-child* event queue, hence we really don't
2795 want anything flushed out of the kernel's queue that we
2796 don't care about. Since this is O(n) this means that if you
2797 have a lot of processes you probably want to handle SIGCHLD
2800 We do not reap the children here (by using WNOWAIT), this
2801 is only done after the event source is dispatched so that
2802 the callback still sees the process as a zombie.
2805 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2806 assert(s
->type
== SOURCE_CHILD
);
2811 if (s
->enabled
== SD_EVENT_OFF
)
2814 zero(s
->child
.siginfo
);
2815 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2816 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2820 if (s
->child
.siginfo
.si_pid
!= 0) {
2821 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
2823 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2824 /* If the child isn't dead then let's
2825 * immediately remove the state change
2826 * from the queue, since there's no
2827 * benefit in leaving it queued */
2829 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2830 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2833 r
= source_set_pending(s
, true);
2842 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2843 bool read_one
= false;
2848 assert_return(events
== EPOLLIN
, -EIO
);
2850 /* If there's a signal queued on this priority and SIGCHLD is
2851 on this priority too, then make sure to recheck the
2852 children we watch. This is because we only ever dequeue
2853 the first signal per priority, and if we dequeue one, and
2854 SIGCHLD might be enqueued later we wouldn't know, but we
2855 might have higher priority children we care about hence we
2856 need to check that explicitly. */
2858 if (sigismember(&d
->sigset
, SIGCHLD
))
2859 e
->need_process_child
= true;
2861 /* If there's already an event source pending for this
2862 * priority we don't read another */
2867 struct signalfd_siginfo si
;
2869 sd_event_source
*s
= NULL
;
2871 n
= read(d
->fd
, &si
, sizeof(si
));
2873 if (IN_SET(errno
, EAGAIN
, EINTR
))
2879 if (_unlikely_(n
!= sizeof(si
)))
2882 assert(SIGNAL_VALID(si
.ssi_signo
));
2886 if (e
->signal_sources
)
2887 s
= e
->signal_sources
[si
.ssi_signo
];
2893 s
->signal
.siginfo
= si
;
2896 r
= source_set_pending(s
, true);
2904 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
) {
2910 assert_return(revents
== EPOLLIN
, -EIO
);
2912 /* If there's already an event source pending for this priority, don't read another */
2913 if (d
->n_pending
> 0)
2916 /* Is the read buffer non-empty? If so, let's not read more */
2917 if (d
->buffer_filled
> 0)
2920 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
2922 if (IN_SET(errno
, EAGAIN
, EINTR
))
2929 d
->buffer_filled
= (size_t) n
;
2930 LIST_PREPEND(buffered
, e
->inotify_data_buffered
, d
);
2935 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
2938 assert(sz
<= d
->buffer_filled
);
2943 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2944 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
2945 d
->buffer_filled
-= sz
;
2947 if (d
->buffer_filled
== 0)
2948 LIST_REMOVE(buffered
, e
->inotify_data_buffered
, d
);
2951 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
2957 /* If there's already an event source pending for this priority, don't read another */
2958 if (d
->n_pending
> 0)
2961 while (d
->buffer_filled
> 0) {
2964 /* Let's validate that the event structures are complete */
2965 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
2968 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
2969 if (d
->buffer_filled
< sz
)
2972 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
2973 struct inode_data
*inode_data
;
2976 /* The queue overran, let's pass this event to all event sources connected to this inotify
2979 HASHMAP_FOREACH(inode_data
, d
->inodes
, i
) {
2982 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
2984 if (s
->enabled
== SD_EVENT_OFF
)
2987 r
= source_set_pending(s
, true);
2993 struct inode_data
*inode_data
;
2996 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2997 * our watch descriptor table. */
2998 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
3000 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3002 event_inotify_data_drop(e
, d
, sz
);
3006 /* The watch descriptor was removed by the kernel, let's drop it here too */
3007 inode_data
->wd
= -1;
3009 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3011 event_inotify_data_drop(e
, d
, sz
);
3016 /* Trigger all event sources that are interested in these events. Also trigger all event
3017 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3018 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
3020 if (s
->enabled
== SD_EVENT_OFF
)
3023 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
3024 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
3027 r
= source_set_pending(s
, true);
3033 /* Something pending now? If so, let's finish, otherwise let's read more. */
3034 if (d
->n_pending
> 0)
3041 static int process_inotify(sd_event
*e
) {
3042 struct inotify_data
*d
;
3047 LIST_FOREACH(buffered
, d
, e
->inotify_data_buffered
) {
3048 r
= event_inotify_data_process(e
, d
);
3058 static int source_dispatch(sd_event_source
*s
) {
3059 EventSourceType saved_type
;
3063 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
3065 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3067 saved_type
= s
->type
;
3069 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
3070 r
= source_set_pending(s
, false);
3075 if (s
->type
!= SOURCE_POST
) {
3079 /* If we execute a non-post source, let's mark all
3080 * post sources as pending */
3082 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
3083 if (z
->enabled
== SD_EVENT_OFF
)
3086 r
= source_set_pending(z
, true);
3092 if (s
->enabled
== SD_EVENT_ONESHOT
) {
3093 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3098 s
->dispatching
= true;
3103 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
3106 case SOURCE_TIME_REALTIME
:
3107 case SOURCE_TIME_BOOTTIME
:
3108 case SOURCE_TIME_MONOTONIC
:
3109 case SOURCE_TIME_REALTIME_ALARM
:
3110 case SOURCE_TIME_BOOTTIME_ALARM
:
3111 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
3115 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
3118 case SOURCE_CHILD
: {
3121 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
3123 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
3125 /* Now, reap the PID for good. */
3127 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
3133 r
= s
->defer
.callback(s
, s
->userdata
);
3137 r
= s
->post
.callback(s
, s
->userdata
);
3141 r
= s
->exit
.callback(s
, s
->userdata
);
3144 case SOURCE_INOTIFY
: {
3145 struct sd_event
*e
= s
->event
;
3146 struct inotify_data
*d
;
3149 assert(s
->inotify
.inode_data
);
3150 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
3152 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
3153 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
3154 assert(d
->buffer_filled
>= sz
);
3156 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
3158 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3160 if (d
->n_pending
== 0)
3161 event_inotify_data_drop(e
, d
, sz
);
3166 case SOURCE_WATCHDOG
:
3167 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
3168 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
3169 assert_not_reached("Wut? I shouldn't exist.");
3172 s
->dispatching
= false;
3175 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
3176 strna(s
->description
), event_source_type_to_string(saved_type
));
3181 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3186 static int event_prepare(sd_event
*e
) {
3194 s
= prioq_peek(e
->prepare
);
3195 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
3198 s
->prepare_iteration
= e
->iteration
;
3199 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
3205 s
->dispatching
= true;
3206 r
= s
->prepare(s
, s
->userdata
);
3207 s
->dispatching
= false;
3210 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3211 strna(s
->description
), event_source_type_to_string(s
->type
));
3216 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
3222 static int dispatch_exit(sd_event
*e
) {
3224 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3229 p
= prioq_peek(e
->exit
);
3230 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
3231 e
->state
= SD_EVENT_FINISHED
;
3235 ref
= sd_event_ref(e
);
3237 e
->state
= SD_EVENT_EXITING
;
3238 r
= source_dispatch(p
);
3239 e
->state
= SD_EVENT_INITIAL
;
3243 static sd_event_source
* event_next_pending(sd_event
*e
) {
3248 p
= prioq_peek(e
->pending
);
3252 if (p
->enabled
== SD_EVENT_OFF
)
3258 static int arm_watchdog(sd_event
*e
) {
3259 struct itimerspec its
= {};
3264 assert(e
->watchdog_fd
>= 0);
3266 t
= sleep_between(e
,
3267 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
3268 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
3270 timespec_store(&its
.it_value
, t
);
3272 /* Make sure we never set the watchdog to 0, which tells the
3273 * kernel to disable it. */
3274 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
3275 its
.it_value
.tv_nsec
= 1;
3277 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
3284 static int process_watchdog(sd_event
*e
) {
3290 /* Don't notify watchdog too often */
3291 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
3294 sd_notify(false, "WATCHDOG=1");
3295 e
->watchdog_last
= e
->timestamp
.monotonic
;
3297 return arm_watchdog(e
);
3300 static void event_close_inode_data_fds(sd_event
*e
) {
3301 struct inode_data
*d
;
3305 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3306 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3307 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3308 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3311 while ((d
= e
->inode_data_to_close
)) {
3313 d
->fd
= safe_close(d
->fd
);
3315 LIST_REMOVE(to_close
, e
->inode_data_to_close
, d
);
3319 _public_
int sd_event_prepare(sd_event
*e
) {
3322 assert_return(e
, -EINVAL
);
3323 assert_return(e
= event_resolve(e
), -ENOPKG
);
3324 assert_return(!event_pid_changed(e
), -ECHILD
);
3325 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3326 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3328 if (e
->exit_requested
)
3333 e
->state
= SD_EVENT_PREPARING
;
3334 r
= event_prepare(e
);
3335 e
->state
= SD_EVENT_INITIAL
;
3339 r
= event_arm_timer(e
, &e
->realtime
);
3343 r
= event_arm_timer(e
, &e
->boottime
);
3347 r
= event_arm_timer(e
, &e
->monotonic
);
3351 r
= event_arm_timer(e
, &e
->realtime_alarm
);
3355 r
= event_arm_timer(e
, &e
->boottime_alarm
);
3359 event_close_inode_data_fds(e
);
3361 if (event_next_pending(e
) || e
->need_process_child
)
3364 e
->state
= SD_EVENT_ARMED
;
3369 e
->state
= SD_EVENT_ARMED
;
3370 r
= sd_event_wait(e
, 0);
3372 e
->state
= SD_EVENT_ARMED
;
3377 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
3378 struct epoll_event
*ev_queue
;
3379 unsigned ev_queue_max
;
3382 assert_return(e
, -EINVAL
);
3383 assert_return(e
= event_resolve(e
), -ENOPKG
);
3384 assert_return(!event_pid_changed(e
), -ECHILD
);
3385 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3386 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
3388 if (e
->exit_requested
) {
3389 e
->state
= SD_EVENT_PENDING
;
3393 ev_queue_max
= MAX(e
->n_sources
, 1u);
3394 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
3396 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3397 if (e
->inotify_data_buffered
)
3400 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
3401 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
3403 if (errno
== EINTR
) {
3404 e
->state
= SD_EVENT_PENDING
;
3412 triple_timestamp_get(&e
->timestamp
);
3414 for (i
= 0; i
< m
; i
++) {
3416 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
3417 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
3419 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
3423 case WAKEUP_EVENT_SOURCE
:
3424 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3427 case WAKEUP_CLOCK_DATA
: {
3428 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
3429 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
3433 case WAKEUP_SIGNAL_DATA
:
3434 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3437 case WAKEUP_INOTIFY_DATA
:
3438 r
= event_inotify_data_read(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
3442 assert_not_reached("Invalid wake-up pointer");
3449 r
= process_watchdog(e
);
3453 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
3457 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
3461 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
3465 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
3469 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
3473 if (e
->need_process_child
) {
3474 r
= process_child(e
);
3479 r
= process_inotify(e
);
3483 if (event_next_pending(e
)) {
3484 e
->state
= SD_EVENT_PENDING
;
3492 e
->state
= SD_EVENT_INITIAL
;
3497 _public_
int sd_event_dispatch(sd_event
*e
) {
3501 assert_return(e
, -EINVAL
);
3502 assert_return(e
= event_resolve(e
), -ENOPKG
);
3503 assert_return(!event_pid_changed(e
), -ECHILD
);
3504 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3505 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
3507 if (e
->exit_requested
)
3508 return dispatch_exit(e
);
3510 p
= event_next_pending(e
);
3512 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3514 ref
= sd_event_ref(e
);
3515 e
->state
= SD_EVENT_RUNNING
;
3516 r
= source_dispatch(p
);
3517 e
->state
= SD_EVENT_INITIAL
;
3521 e
->state
= SD_EVENT_INITIAL
;
3526 static void event_log_delays(sd_event
*e
) {
3527 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1];
3531 for (i
= o
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
3532 o
+= snprintf(&b
[o
], sizeof(b
) - o
, "%u ", e
->delays
[i
]);
3535 log_debug("Event loop iterations: %.*s", o
, b
);
3538 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
3541 assert_return(e
, -EINVAL
);
3542 assert_return(e
= event_resolve(e
), -ENOPKG
);
3543 assert_return(!event_pid_changed(e
), -ECHILD
);
3544 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3545 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3547 if (e
->profile_delays
&& e
->last_run
) {
3551 this_run
= now(CLOCK_MONOTONIC
);
3553 l
= u64log2(this_run
- e
->last_run
);
3554 assert(l
< sizeof(e
->delays
));
3557 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
3558 event_log_delays(e
);
3559 e
->last_log
= this_run
;
3563 r
= sd_event_prepare(e
);
3565 /* There was nothing? Then wait... */
3566 r
= sd_event_wait(e
, timeout
);
3568 if (e
->profile_delays
)
3569 e
->last_run
= now(CLOCK_MONOTONIC
);
3572 /* There's something now, then let's dispatch it */
3573 r
= sd_event_dispatch(e
);
3583 _public_
int sd_event_loop(sd_event
*e
) {
3584 _cleanup_(sd_event_unrefp
) sd_event
*ref
= NULL
;
3587 assert_return(e
, -EINVAL
);
3588 assert_return(e
= event_resolve(e
), -ENOPKG
);
3589 assert_return(!event_pid_changed(e
), -ECHILD
);
3590 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
3592 ref
= sd_event_ref(e
);
3594 while (e
->state
!= SD_EVENT_FINISHED
) {
3595 r
= sd_event_run(e
, (uint64_t) -1);
3600 return e
->exit_code
;
3603 _public_
int sd_event_get_fd(sd_event
*e
) {
3605 assert_return(e
, -EINVAL
);
3606 assert_return(e
= event_resolve(e
), -ENOPKG
);
3607 assert_return(!event_pid_changed(e
), -ECHILD
);
3612 _public_
int sd_event_get_state(sd_event
*e
) {
3613 assert_return(e
, -EINVAL
);
3614 assert_return(e
= event_resolve(e
), -ENOPKG
);
3615 assert_return(!event_pid_changed(e
), -ECHILD
);
3620 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
3621 assert_return(e
, -EINVAL
);
3622 assert_return(e
= event_resolve(e
), -ENOPKG
);
3623 assert_return(code
, -EINVAL
);
3624 assert_return(!event_pid_changed(e
), -ECHILD
);
3626 if (!e
->exit_requested
)
3629 *code
= e
->exit_code
;
3633 _public_
int sd_event_exit(sd_event
*e
, int code
) {
3634 assert_return(e
, -EINVAL
);
3635 assert_return(e
= event_resolve(e
), -ENOPKG
);
3636 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3637 assert_return(!event_pid_changed(e
), -ECHILD
);
3639 e
->exit_requested
= true;
3640 e
->exit_code
= code
;
3645 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
3646 assert_return(e
, -EINVAL
);
3647 assert_return(e
= event_resolve(e
), -ENOPKG
);
3648 assert_return(usec
, -EINVAL
);
3649 assert_return(!event_pid_changed(e
), -ECHILD
);
3651 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
3654 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3655 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3656 * the purpose of getting the time this doesn't matter. */
3657 if (IN_SET(clock
, CLOCK_BOOTTIME
, CLOCK_BOOTTIME_ALARM
) && !clock_boottime_supported())
3660 if (!triple_timestamp_is_set(&e
->timestamp
)) {
3661 /* Implicitly fall back to now() if we never ran
3662 * before and thus have no cached time. */
3667 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
3671 _public_
int sd_event_default(sd_event
**ret
) {
3676 return !!default_event
;
3678 if (default_event
) {
3679 *ret
= sd_event_ref(default_event
);
3683 r
= sd_event_new(&e
);
3687 e
->default_event_ptr
= &default_event
;
3695 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
3696 assert_return(e
, -EINVAL
);
3697 assert_return(e
= event_resolve(e
), -ENOPKG
);
3698 assert_return(tid
, -EINVAL
);
3699 assert_return(!event_pid_changed(e
), -ECHILD
);
3709 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
3712 assert_return(e
, -EINVAL
);
3713 assert_return(e
= event_resolve(e
), -ENOPKG
);
3714 assert_return(!event_pid_changed(e
), -ECHILD
);
3716 if (e
->watchdog
== !!b
)
3720 struct epoll_event ev
;
3722 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
3726 /* Issue first ping immediately */
3727 sd_notify(false, "WATCHDOG=1");
3728 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
3730 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
3731 if (e
->watchdog_fd
< 0)
3734 r
= arm_watchdog(e
);
3738 ev
= (struct epoll_event
) {
3740 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
3743 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
3750 if (e
->watchdog_fd
>= 0) {
3751 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
3752 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3760 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
3764 _public_
int sd_event_get_watchdog(sd_event
*e
) {
3765 assert_return(e
, -EINVAL
);
3766 assert_return(e
= event_resolve(e
), -ENOPKG
);
3767 assert_return(!event_pid_changed(e
), -ECHILD
);
3772 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
3773 assert_return(e
, -EINVAL
);
3774 assert_return(e
= event_resolve(e
), -ENOPKG
);
3775 assert_return(!event_pid_changed(e
), -ECHILD
);
3777 *ret
= e
->iteration
;
3781 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
3782 assert_return(s
, -EINVAL
);
3784 s
->destroy_callback
= callback
;
3788 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
3789 assert_return(s
, -EINVAL
);
3792 *ret
= s
->destroy_callback
;
3794 return !!s
->destroy_callback
;