1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
5 #include <sys/timerfd.h>
10 #include "sd-daemon.h"
13 #include "sd-messages.h"
15 #include "alloc-util.h"
16 #include "errno-util.h"
17 #include "event-source.h"
19 #include "format-util.h"
20 #include "glyph-util.h"
22 #include "hexdecoct.h"
25 #include "logarithm.h"
26 #include "memory-util.h"
27 #include "missing_magic.h"
28 #include "missing_wait.h"
29 #include "origin-id.h"
30 #include "path-util.h"
31 #include "pidfd-util.h"
33 #include "process-util.h"
36 #include "signal-util.h"
37 #include "siphash24.h"
38 #include "socket-util.h"
39 #include "stat-util.h"
40 #include "string-table.h"
41 #include "string-util.h"
44 #include "time-util.h"
46 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
48 static bool EVENT_SOURCE_WATCH_PIDFD(const sd_event_source
*s
) {
49 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
51 s
->type
== SOURCE_CHILD
&&
52 s
->child
.options
== WEXITED
;
55 static bool event_source_is_online(sd_event_source
*s
) {
57 return s
->enabled
!= SD_EVENT_OFF
&& !s
->ratelimited
;
60 static bool event_source_is_offline(sd_event_source
*s
) {
62 return s
->enabled
== SD_EVENT_OFF
|| s
->ratelimited
;
65 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
67 [SOURCE_TIME_REALTIME
] = "realtime",
68 [SOURCE_TIME_BOOTTIME
] = "boottime",
69 [SOURCE_TIME_MONOTONIC
] = "monotonic",
70 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
71 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
72 [SOURCE_SIGNAL
] = "signal",
73 [SOURCE_CHILD
] = "child",
74 [SOURCE_DEFER
] = "defer",
75 [SOURCE_POST
] = "post",
76 [SOURCE_EXIT
] = "exit",
77 [SOURCE_WATCHDOG
] = "watchdog",
78 [SOURCE_INOTIFY
] = "inotify",
79 [SOURCE_MEMORY_PRESSURE
] = "memory-pressure",
82 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
84 #define EVENT_SOURCE_IS_TIME(t) \
86 SOURCE_TIME_REALTIME, \
87 SOURCE_TIME_BOOTTIME, \
88 SOURCE_TIME_MONOTONIC, \
89 SOURCE_TIME_REALTIME_ALARM, \
90 SOURCE_TIME_BOOTTIME_ALARM)
92 #define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
95 SOURCE_TIME_REALTIME, \
96 SOURCE_TIME_BOOTTIME, \
97 SOURCE_TIME_MONOTONIC, \
98 SOURCE_TIME_REALTIME_ALARM, \
99 SOURCE_TIME_BOOTTIME_ALARM, \
103 SOURCE_MEMORY_PRESSURE)
105 /* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
106 * Time sources and ratelimited sources can be passed, so effectively this is the same as the
107 * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */
108 #define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t)
119 /* timerfd_create() only supports these five clocks so far. We
120 * can add support for more clocks when the kernel learns to
121 * deal with them, too. */
122 struct clock_data realtime
;
123 struct clock_data boottime
;
124 struct clock_data monotonic
;
125 struct clock_data realtime_alarm
;
126 struct clock_data boottime_alarm
;
130 sd_event_source
**signal_sources
; /* indexed by signal number */
131 Hashmap
*signal_data
; /* indexed by priority */
133 Hashmap
*child_sources
;
134 unsigned n_online_child_sources
;
140 Hashmap
*inotify_data
; /* indexed by priority */
142 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
143 LIST_HEAD(struct inode_data
, inode_data_to_close_list
);
145 /* A list of inotify objects that already have events buffered which aren't processed yet */
146 LIST_HEAD(struct inotify_data
, buffered_inotify_data_list
);
148 /* A list of memory pressure event sources that still need their subscription string written */
149 LIST_HEAD(sd_event_source
, memory_pressure_write_list
);
154 triple_timestamp timestamp
;
157 bool exit_requested
:1;
158 bool need_process_child
:1;
160 bool profile_delays
:1;
165 sd_event
**default_event_ptr
;
167 usec_t watchdog_last
, watchdog_period
;
171 struct epoll_event
*event_queue
;
173 LIST_HEAD(sd_event_source
, sources
);
175 sd_event_source
*sigint_event_source
, *sigterm_event_source
;
177 usec_t last_run_usec
, last_log_usec
;
178 unsigned delays
[sizeof(usec_t
) * 8];
181 DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_event
, event
);
183 static thread_local sd_event
*default_event
= NULL
;
185 static void source_disconnect(sd_event_source
*s
);
186 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
188 static sd_event
* event_resolve(sd_event
*e
) {
189 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
192 static int pending_prioq_compare(const void *a
, const void *b
) {
193 const sd_event_source
*x
= a
, *y
= b
;
199 /* Enabled ones first */
200 r
= CMP(x
->enabled
== SD_EVENT_OFF
, y
->enabled
== SD_EVENT_OFF
);
204 /* Non rate-limited ones first. */
205 r
= CMP(!!x
->ratelimited
, !!y
->ratelimited
);
209 /* Lower priority values first */
210 r
= CMP(x
->priority
, y
->priority
);
214 /* Older entries first */
215 return CMP(x
->pending_iteration
, y
->pending_iteration
);
218 static int prepare_prioq_compare(const void *a
, const void *b
) {
219 const sd_event_source
*x
= a
, *y
= b
;
225 /* Enabled ones first */
226 r
= CMP(x
->enabled
== SD_EVENT_OFF
, y
->enabled
== SD_EVENT_OFF
);
230 /* Non rate-limited ones first. */
231 r
= CMP(!!x
->ratelimited
, !!y
->ratelimited
);
235 /* Move most recently prepared ones last, so that we can stop
236 * preparing as soon as we hit one that has already been
237 * prepared in the current iteration */
238 r
= CMP(x
->prepare_iteration
, y
->prepare_iteration
);
242 /* Lower priority values first */
243 return CMP(x
->priority
, y
->priority
);
246 static usec_t
time_event_source_next(const sd_event_source
*s
) {
249 /* We have two kinds of event sources that have elapsation times associated with them: the actual
250 * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
251 * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
252 * looking at here. */
254 if (s
->ratelimited
) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
255 assert(s
->rate_limit
.begin
!= 0);
256 assert(s
->rate_limit
.interval
!= 0);
257 return usec_add(s
->rate_limit
.begin
, s
->rate_limit
.interval
);
260 /* Otherwise this must be a time event source, if not ratelimited */
261 if (EVENT_SOURCE_IS_TIME(s
->type
))
264 return USEC_INFINITY
;
267 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
270 if (s
->ratelimited
) { /* For ratelimited stuff the earliest and the latest time shall actually be the
271 * same, as we should avoid adding additional inaccuracy on an inaccuracy time
273 assert(s
->rate_limit
.begin
!= 0);
274 assert(s
->rate_limit
.interval
!= 0);
275 return usec_add(s
->rate_limit
.begin
, s
->rate_limit
.interval
);
278 /* Must be a time event source, if not ratelimited */
279 if (EVENT_SOURCE_IS_TIME(s
->type
))
280 return usec_add(s
->time
.next
, s
->time
.accuracy
);
282 return USEC_INFINITY
;
285 static bool event_source_timer_candidate(const sd_event_source
*s
) {
288 /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
289 * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
290 return !s
->pending
|| s
->ratelimited
;
293 static int time_prioq_compare(const void *a
, const void *b
, usec_t (*time_func
)(const sd_event_source
*s
)) {
294 const sd_event_source
*x
= a
, *y
= b
;
297 /* Enabled ones first */
298 r
= CMP(x
->enabled
== SD_EVENT_OFF
, y
->enabled
== SD_EVENT_OFF
);
302 /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
303 r
= CMP(!event_source_timer_candidate(x
), !event_source_timer_candidate(y
));
308 return CMP(time_func(x
), time_func(y
));
311 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
312 return time_prioq_compare(a
, b
, time_event_source_next
);
315 static int latest_time_prioq_compare(const void *a
, const void *b
) {
316 return time_prioq_compare(a
, b
, time_event_source_latest
);
319 static int exit_prioq_compare(const void *a
, const void *b
) {
320 const sd_event_source
*x
= a
, *y
= b
;
323 assert(x
->type
== SOURCE_EXIT
);
324 assert(y
->type
== SOURCE_EXIT
);
326 /* Enabled ones first */
327 r
= CMP(x
->enabled
== SD_EVENT_OFF
, y
->enabled
== SD_EVENT_OFF
);
331 /* Lower priority values first */
332 return CMP(x
->priority
, y
->priority
);
335 static void free_clock_data(struct clock_data
*d
) {
337 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
340 prioq_free(d
->earliest
);
341 prioq_free(d
->latest
);
344 static sd_event
* event_free(sd_event
*e
) {
349 e
->sigterm_event_source
= sd_event_source_unref(e
->sigterm_event_source
);
350 e
->sigint_event_source
= sd_event_source_unref(e
->sigint_event_source
);
352 while ((s
= e
->sources
)) {
354 source_disconnect(s
);
355 sd_event_source_unref(s
);
358 assert(e
->n_sources
== 0);
360 if (e
->default_event_ptr
)
361 *(e
->default_event_ptr
) = NULL
;
363 safe_close(e
->epoll_fd
);
364 safe_close(e
->watchdog_fd
);
366 free_clock_data(&e
->realtime
);
367 free_clock_data(&e
->boottime
);
368 free_clock_data(&e
->monotonic
);
369 free_clock_data(&e
->realtime_alarm
);
370 free_clock_data(&e
->boottime_alarm
);
372 prioq_free(e
->pending
);
373 prioq_free(e
->prepare
);
376 free(e
->signal_sources
);
377 hashmap_free(e
->signal_data
);
379 hashmap_free(e
->inotify_data
);
381 hashmap_free(e
->child_sources
);
382 set_free(e
->post_sources
);
384 free(e
->event_queue
);
389 _public_
int sd_event_new(sd_event
** ret
) {
393 assert_return(ret
, -EINVAL
);
395 e
= new(sd_event
, 1);
402 .watchdog_fd
= -EBADF
,
403 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
404 .realtime
.fd
= -EBADF
,
405 .realtime
.next
= USEC_INFINITY
,
406 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
407 .boottime
.fd
= -EBADF
,
408 .boottime
.next
= USEC_INFINITY
,
409 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
410 .monotonic
.fd
= -EBADF
,
411 .monotonic
.next
= USEC_INFINITY
,
412 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
413 .realtime_alarm
.fd
= -EBADF
,
414 .realtime_alarm
.next
= USEC_INFINITY
,
415 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
416 .boottime_alarm
.fd
= -EBADF
,
417 .boottime_alarm
.next
= USEC_INFINITY
,
418 .perturb
= USEC_INFINITY
,
419 .origin_id
= origin_id_query(),
422 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
426 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
427 if (e
->epoll_fd
< 0) {
432 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
434 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
435 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 %s 2^63 us will be logged every 5s.",
436 glyph(GLYPH_ELLIPSIS
));
437 e
->profile_delays
= true;
448 /* Define manually so we can add the origin check */
449 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
452 if (event_origin_changed(e
))
460 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
463 if (event_origin_changed(e
))
466 assert(e
->n_ref
> 0);
470 return event_free(e
);
473 #define PROTECT_EVENT(e) \
474 _unused_ _cleanup_(sd_event_unrefp) sd_event *_ref = sd_event_ref(e);
476 _public_ sd_event_source
* sd_event_source_disable_unref(sd_event_source
*s
) {
479 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
481 log_debug_errno(r
, "Failed to disable event source %p (%s): %m",
482 s
, strna(s
->description
));
484 return sd_event_source_unref(s
);
487 static void source_io_unregister(sd_event_source
*s
) {
489 assert(s
->type
== SOURCE_IO
);
491 if (event_origin_changed(s
->event
))
494 if (!s
->io
.registered
)
497 if (epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
) < 0)
498 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
499 strna(s
->description
), event_source_type_to_string(s
->type
));
501 s
->io
.registered
= false;
504 static int source_io_register(
510 assert(s
->type
== SOURCE_IO
);
511 assert(enabled
!= SD_EVENT_OFF
);
513 struct epoll_event ev
= {
514 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
518 if (epoll_ctl(s
->event
->epoll_fd
,
519 s
->io
.registered
? EPOLL_CTL_MOD
: EPOLL_CTL_ADD
,
523 s
->io
.registered
= true;
528 static void source_child_pidfd_unregister(sd_event_source
*s
) {
530 assert(s
->type
== SOURCE_CHILD
);
532 if (event_origin_changed(s
->event
))
535 if (!s
->child
.registered
)
538 if (EVENT_SOURCE_WATCH_PIDFD(s
))
539 if (epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->child
.pidfd
, NULL
) < 0)
540 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
541 strna(s
->description
), event_source_type_to_string(s
->type
));
543 s
->child
.registered
= false;
546 static int source_child_pidfd_register(sd_event_source
*s
, int enabled
) {
548 assert(s
->type
== SOURCE_CHILD
);
549 assert(enabled
!= SD_EVENT_OFF
);
551 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
552 struct epoll_event ev
= {
553 .events
= EPOLLIN
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
557 if (epoll_ctl(s
->event
->epoll_fd
,
558 s
->child
.registered
? EPOLL_CTL_MOD
: EPOLL_CTL_ADD
,
559 s
->child
.pidfd
, &ev
) < 0)
563 s
->child
.registered
= true;
567 static void source_memory_pressure_unregister(sd_event_source
*s
) {
569 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
571 if (event_origin_changed(s
->event
))
574 if (!s
->memory_pressure
.registered
)
577 if (epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->memory_pressure
.fd
, NULL
) < 0)
578 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
579 strna(s
->description
), event_source_type_to_string(s
->type
));
581 s
->memory_pressure
.registered
= false;
584 static int source_memory_pressure_register(sd_event_source
*s
, int enabled
) {
586 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
587 assert(enabled
!= SD_EVENT_OFF
);
589 struct epoll_event ev
= {
590 .events
= s
->memory_pressure
.write_buffer_size
> 0 ? EPOLLOUT
:
591 (s
->memory_pressure
.events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0)),
595 if (epoll_ctl(s
->event
->epoll_fd
,
596 s
->memory_pressure
.registered
? EPOLL_CTL_MOD
: EPOLL_CTL_ADD
,
597 s
->memory_pressure
.fd
, &ev
) < 0)
600 s
->memory_pressure
.registered
= true;
604 static void source_memory_pressure_add_to_write_list(sd_event_source
*s
) {
606 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
608 if (s
->memory_pressure
.in_write_list
)
611 LIST_PREPEND(memory_pressure
.write_list
, s
->event
->memory_pressure_write_list
, s
);
612 s
->memory_pressure
.in_write_list
= true;
615 static void source_memory_pressure_remove_from_write_list(sd_event_source
*s
) {
617 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
619 if (!s
->memory_pressure
.in_write_list
)
622 LIST_REMOVE(memory_pressure
.write_list
, s
->event
->memory_pressure_write_list
, s
);
623 s
->memory_pressure
.in_write_list
= false;
626 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
630 case SOURCE_TIME_REALTIME
:
631 return CLOCK_REALTIME
;
633 case SOURCE_TIME_BOOTTIME
:
634 return CLOCK_BOOTTIME
;
636 case SOURCE_TIME_MONOTONIC
:
637 return CLOCK_MONOTONIC
;
639 case SOURCE_TIME_REALTIME_ALARM
:
640 return CLOCK_REALTIME_ALARM
;
642 case SOURCE_TIME_BOOTTIME_ALARM
:
643 return CLOCK_BOOTTIME_ALARM
;
646 return (clockid_t
) -1;
650 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
655 return SOURCE_TIME_REALTIME
;
658 return SOURCE_TIME_BOOTTIME
;
660 case CLOCK_MONOTONIC
:
661 return SOURCE_TIME_MONOTONIC
;
663 case CLOCK_REALTIME_ALARM
:
664 return SOURCE_TIME_REALTIME_ALARM
;
666 case CLOCK_BOOTTIME_ALARM
:
667 return SOURCE_TIME_BOOTTIME_ALARM
;
670 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
674 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
679 case SOURCE_TIME_REALTIME
:
682 case SOURCE_TIME_BOOTTIME
:
685 case SOURCE_TIME_MONOTONIC
:
686 return &e
->monotonic
;
688 case SOURCE_TIME_REALTIME_ALARM
:
689 return &e
->realtime_alarm
;
691 case SOURCE_TIME_BOOTTIME_ALARM
:
692 return &e
->boottime_alarm
;
699 static void event_free_signal_data(sd_event
*e
, struct signal_data
*d
) {
705 hashmap_remove(e
->signal_data
, &d
->priority
);
710 static int event_make_signal_data(
713 struct signal_data
**ret
) {
715 struct signal_data
*d
;
723 if (event_origin_changed(e
))
726 if (e
->signal_sources
&& e
->signal_sources
[sig
])
727 priority
= e
->signal_sources
[sig
]->priority
;
729 priority
= SD_EVENT_PRIORITY_NORMAL
;
731 d
= hashmap_get(e
->signal_data
, &priority
);
733 if (sigismember(&d
->sigset
, sig
) > 0) {
739 d
= new(struct signal_data
, 1);
743 *d
= (struct signal_data
) {
744 .wakeup
= WAKEUP_SIGNAL_DATA
,
746 .priority
= priority
,
749 r
= hashmap_ensure_put(&e
->signal_data
, &uint64_hash_ops
, &d
->priority
, d
);
759 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
761 r
= signalfd(d
->fd
>= 0 ? d
->fd
: -1, /* the first arg must be -1 or a valid signalfd */
763 SFD_NONBLOCK
|SFD_CLOEXEC
);
777 d
->fd
= fd_move_above_stdio(r
);
779 struct epoll_event ev
= {
784 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
796 event_free_signal_data(e
, d
);
801 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
805 /* Turns off the specified signal in the signal data
806 * object. If the signal mask of the object becomes empty that
809 if (sigismember(&d
->sigset
, sig
) == 0)
812 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
814 if (sigisemptyset(&d
->sigset
)) {
815 /* If all the mask is all-zero we can get rid of the structure */
816 event_free_signal_data(e
, d
);
820 if (event_origin_changed(e
))
825 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
826 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
829 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
830 struct signal_data
*d
;
831 static const int64_t zero_priority
= 0;
835 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
836 * and possibly drop the signalfd for it. */
838 if (sig
== SIGCHLD
&&
839 e
->n_online_child_sources
> 0)
842 if (e
->signal_sources
&&
843 e
->signal_sources
[sig
] &&
844 event_source_is_online(e
->signal_sources
[sig
]))
848 * The specified signal might be enabled in three different queues:
850 * 1) the one that belongs to the priority passed (if it is non-NULL)
851 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
852 * 3) the 0 priority (to cover the SIGCHLD case)
854 * Hence, let's remove it from all three here.
858 d
= hashmap_get(e
->signal_data
, priority
);
860 event_unmask_signal_data(e
, d
, sig
);
863 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
864 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
866 event_unmask_signal_data(e
, d
, sig
);
869 d
= hashmap_get(e
->signal_data
, &zero_priority
);
871 event_unmask_signal_data(e
, d
, sig
);
874 static void event_source_pp_prioq_reshuffle(sd_event_source
*s
) {
877 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
878 * they are enabled/disabled or marked pending and such. */
881 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
884 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
887 static void event_source_time_prioq_reshuffle(sd_event_source
*s
) {
888 struct clock_data
*d
;
892 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
893 * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered
897 d
= &s
->event
->monotonic
;
898 else if (EVENT_SOURCE_IS_TIME(s
->type
))
899 assert_se(d
= event_get_clock_data(s
->event
, s
->type
));
901 return; /* no-op for an event source which is neither a timer nor ratelimited. */
903 prioq_reshuffle(d
->earliest
, s
, &s
->earliest_index
);
904 prioq_reshuffle(d
->latest
, s
, &s
->latest_index
);
905 d
->needs_rearm
= true;
908 static void event_source_time_prioq_remove(
910 struct clock_data
*d
) {
915 prioq_remove(d
->earliest
, s
, &s
->earliest_index
);
916 prioq_remove(d
->latest
, s
, &s
->latest_index
);
917 s
->earliest_index
= s
->latest_index
= PRIOQ_IDX_NULL
;
918 d
->needs_rearm
= true;
921 static void source_disconnect(sd_event_source
*s
) {
930 assert(s
->event
->n_sources
> 0);
936 source_io_unregister(s
);
940 case SOURCE_TIME_REALTIME
:
941 case SOURCE_TIME_BOOTTIME
:
942 case SOURCE_TIME_MONOTONIC
:
943 case SOURCE_TIME_REALTIME_ALARM
:
944 case SOURCE_TIME_BOOTTIME_ALARM
:
945 /* Only remove this event source from the time event source here if it is not ratelimited. If
946 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
947 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
949 if (!s
->ratelimited
) {
950 struct clock_data
*d
;
951 assert_se(d
= event_get_clock_data(s
->event
, s
->type
));
952 event_source_time_prioq_remove(s
, d
);
958 if (s
->signal
.sig
> 0) {
960 if (s
->event
->signal_sources
)
961 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
963 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
965 if (s
->signal
.unblock
) {
968 if (sigemptyset(&new_ss
) < 0)
969 log_debug_errno(errno
, "Failed to reset signal set, ignoring: %m");
970 else if (sigaddset(&new_ss
, s
->signal
.sig
) < 0)
971 log_debug_errno(errno
, "Failed to add signal %i to signal mask, ignoring: %m", s
->signal
.sig
);
973 r
= pthread_sigmask(SIG_UNBLOCK
, &new_ss
, NULL
);
975 log_debug_errno(r
, "Failed to unblock signal %i, ignoring: %m", s
->signal
.sig
);
983 if (event_origin_changed(s
->event
))
984 s
->child
.process_owned
= false;
986 if (s
->child
.pid
> 0) {
987 if (event_source_is_online(s
)) {
988 assert(s
->event
->n_online_child_sources
> 0);
989 s
->event
->n_online_child_sources
--;
992 assert_se(hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
)));
995 if (EVENT_SOURCE_WATCH_PIDFD(s
))
996 source_child_pidfd_unregister(s
);
998 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1007 set_remove(s
->event
->post_sources
, s
);
1011 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1014 case SOURCE_INOTIFY
: {
1015 struct inode_data
*inode_data
;
1017 inode_data
= s
->inotify
.inode_data
;
1019 struct inotify_data
*inotify_data
;
1020 assert_se(inotify_data
= inode_data
->inotify_data
);
1022 /* Detach this event source from the inode object */
1023 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
1024 s
->inotify
.inode_data
= NULL
;
1027 assert(inotify_data
->n_pending
> 0);
1028 inotify_data
->n_pending
--;
1031 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
1032 * continued to being watched. That's because inotify doesn't really have an API for that: we
1033 * can only change watch masks with access to the original inode either by fd or by path. But
1034 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
1035 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
1036 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
1037 * there), but given the need for open_by_handle_at() which is privileged and not universally
1038 * available this would be quite an incomplete solution. Hence we go the other way, leave the
1039 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
1040 * anymore after reception. Yes, this sucks, but … Linux … */
1042 /* Maybe release the inode data (and its inotify) */
1043 event_gc_inode_data(s
->event
, inode_data
);
1049 case SOURCE_MEMORY_PRESSURE
:
1050 source_memory_pressure_remove_from_write_list(s
);
1051 source_memory_pressure_unregister(s
);
1055 assert_not_reached();
1059 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
1062 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1065 event_source_time_prioq_remove(s
, &s
->event
->monotonic
);
1067 event
= TAKE_PTR(s
->event
);
1068 LIST_REMOVE(sources
, event
->sources
, s
);
1071 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
1072 * pidfd associated with this event source, which we'll do only on source_free(). */
1075 sd_event_unref(event
);
1078 static sd_event_source
* source_free(sd_event_source
*s
) {
1083 source_disconnect(s
);
1085 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
1086 s
->io
.fd
= safe_close(s
->io
.fd
);
1088 if (s
->type
== SOURCE_CHILD
) {
1089 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
1091 if (s
->child
.process_owned
) {
1092 assert(s
->child
.pid
> 0);
1093 assert(s
->child
.pidfd
>= 0);
1095 if (!s
->child
.exited
) {
1096 r
= RET_NERRNO(pidfd_send_signal(s
->child
.pidfd
, SIGKILL
, NULL
, 0));
1097 if (r
< 0 && r
!= -ESRCH
)
1098 log_debug_errno(r
, "Failed to kill process " PID_FMT
", ignoring: %m",
1102 if (!s
->child
.waited
) {
1105 /* Reap the child if we can */
1106 (void) waitid(P_PIDFD
, s
->child
.pidfd
, &si
, WEXITED
);
1110 if (s
->child
.pidfd_owned
)
1111 s
->child
.pidfd
= safe_close(s
->child
.pidfd
);
1114 if (s
->type
== SOURCE_MEMORY_PRESSURE
) {
1115 s
->memory_pressure
.fd
= safe_close(s
->memory_pressure
.fd
);
1116 s
->memory_pressure
.write_buffer
= mfree(s
->memory_pressure
.write_buffer
);
1119 if (s
->destroy_callback
)
1120 s
->destroy_callback(s
->userdata
);
1122 free(s
->description
);
1125 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source
*, source_free
);
1127 static int source_set_pending(sd_event_source
*s
, bool b
) {
1131 assert(s
->type
!= SOURCE_EXIT
);
1133 if (s
->pending
== b
)
1139 s
->pending_iteration
= s
->event
->iteration
;
1141 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
1147 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
1149 if (EVENT_SOURCE_IS_TIME(s
->type
))
1150 event_source_time_prioq_reshuffle(s
);
1152 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
1153 struct signal_data
*d
;
1155 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
1156 if (d
&& d
->current
== s
)
1160 if (s
->type
== SOURCE_INOTIFY
) {
1162 assert(s
->inotify
.inode_data
);
1163 assert(s
->inotify
.inode_data
->inotify_data
);
1166 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
1168 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
1169 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
1176 static sd_event_source
* source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
1178 /* Let's allocate exactly what we need. Note that the difference of the smallest event source
1179 * structure to the largest is 144 bytes on x86-64 at the time of writing, i.e. more than two cache
1181 static const size_t size_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
1182 [SOURCE_IO
] = endoffsetof_field(sd_event_source
, io
),
1183 [SOURCE_TIME_REALTIME
] = endoffsetof_field(sd_event_source
, time
),
1184 [SOURCE_TIME_BOOTTIME
] = endoffsetof_field(sd_event_source
, time
),
1185 [SOURCE_TIME_MONOTONIC
] = endoffsetof_field(sd_event_source
, time
),
1186 [SOURCE_TIME_REALTIME_ALARM
] = endoffsetof_field(sd_event_source
, time
),
1187 [SOURCE_TIME_BOOTTIME_ALARM
] = endoffsetof_field(sd_event_source
, time
),
1188 [SOURCE_SIGNAL
] = endoffsetof_field(sd_event_source
, signal
),
1189 [SOURCE_CHILD
] = endoffsetof_field(sd_event_source
, child
),
1190 [SOURCE_DEFER
] = endoffsetof_field(sd_event_source
, defer
),
1191 [SOURCE_POST
] = endoffsetof_field(sd_event_source
, post
),
1192 [SOURCE_EXIT
] = endoffsetof_field(sd_event_source
, exit
),
1193 [SOURCE_INOTIFY
] = endoffsetof_field(sd_event_source
, inotify
),
1194 [SOURCE_MEMORY_PRESSURE
] = endoffsetof_field(sd_event_source
, memory_pressure
),
1201 assert(type
< _SOURCE_EVENT_SOURCE_TYPE_MAX
);
1202 assert(size_table
[type
] > 0);
1204 s
= malloc0(size_table
[type
]);
1207 /* We use expand_to_usable() here to tell gcc that it should consider this an object of the full
1208 * size, even if we only allocate the initial part we need. */
1209 s
= expand_to_usable(s
, sizeof(sd_event_source
));
1211 /* Note: we cannot use compound initialization here, because sizeof(sd_event_source) is likely larger
1212 * than what we allocated here. */
1215 s
->floating
= floating
;
1217 s
->pending_index
= PRIOQ_IDX_NULL
;
1218 s
->prepare_index
= PRIOQ_IDX_NULL
;
1223 LIST_PREPEND(sources
, e
->sources
, s
);
1229 static int io_exit_callback(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
1232 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1235 _public_
int sd_event_add_io(
1237 sd_event_source
**ret
,
1240 sd_event_io_handler_t callback
,
1243 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1246 assert_return(e
, -EINVAL
);
1247 assert_return(e
= event_resolve(e
), -ENOPKG
);
1248 assert_return(fd
>= 0, -EBADF
);
1249 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1250 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1251 assert_return(!event_origin_changed(e
), -ECHILD
);
1254 callback
= io_exit_callback
;
1256 s
= source_new(e
, !ret
, SOURCE_IO
);
1260 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1262 s
->io
.events
= events
;
1263 s
->io
.callback
= callback
;
1264 s
->userdata
= userdata
;
1265 s
->enabled
= SD_EVENT_ON
;
1267 r
= source_io_register(s
, s
->enabled
, events
);
1278 static void initialize_perturb(sd_event
*e
) {
1281 /* When we sleep for longer, we try to realign the wakeup to the same time within each
1282 * minute/second/250ms, so that events all across the system can be coalesced into a single CPU
1283 * wakeup. However, let's take some system-specific randomness for this value, so that in a network
1284 * of systems with synced clocks timer events are distributed a bit. Here, we calculate a
1285 * perturbation usec offset from the boot ID (or machine ID if failed, e.g. /proc is not mounted). */
1287 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1290 if (sd_id128_get_boot(&id
) >= 0 || sd_id128_get_machine(&id
) >= 0)
1291 e
->perturb
= (id
.qwords
[0] ^ id
.qwords
[1]) % USEC_PER_MINUTE
;
1293 e
->perturb
= 0; /* This is a super early process without /proc and /etc ?? */
1296 static int event_setup_timer_fd(
1298 struct clock_data
*d
,
1304 if (_likely_(d
->fd
>= 0))
1307 _cleanup_close_
int fd
= -EBADF
;
1309 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1313 fd
= fd_move_above_stdio(fd
);
1315 struct epoll_event ev
= {
1320 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0)
1323 d
->fd
= TAKE_FD(fd
);
1327 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1330 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1333 static int setup_clock_data(sd_event
*e
, struct clock_data
*d
, clockid_t clock
) {
1339 r
= event_setup_timer_fd(e
, d
, clock
);
1344 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1348 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1355 static int event_source_time_prioq_put(
1357 struct clock_data
*d
) {
1363 assert(EVENT_SOURCE_USES_TIME_PRIOQ(s
->type
));
1365 r
= prioq_put(d
->earliest
, s
, &s
->earliest_index
);
1369 r
= prioq_put(d
->latest
, s
, &s
->latest_index
);
1371 assert_se(prioq_remove(d
->earliest
, s
, &s
->earliest_index
) > 0);
1372 s
->earliest_index
= PRIOQ_IDX_NULL
;
1376 d
->needs_rearm
= true;
1380 _public_
int sd_event_add_time(
1382 sd_event_source
**ret
,
1386 sd_event_time_handler_t callback
,
1389 EventSourceType type
;
1390 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1391 struct clock_data
*d
;
1394 assert_return(e
, -EINVAL
);
1395 assert_return(e
= event_resolve(e
), -ENOPKG
);
1396 assert_return(accuracy
!= UINT64_MAX
, -EINVAL
);
1397 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1398 assert_return(!event_origin_changed(e
), -ECHILD
);
1400 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1403 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1408 callback
= time_exit_callback
;
1410 assert_se(d
= event_get_clock_data(e
, type
));
1412 r
= setup_clock_data(e
, d
, clock
);
1416 s
= source_new(e
, !ret
, type
);
1420 s
->time
.next
= usec
;
1421 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1422 s
->time
.callback
= callback
;
1423 s
->earliest_index
= s
->latest_index
= PRIOQ_IDX_NULL
;
1424 s
->userdata
= userdata
;
1425 s
->enabled
= SD_EVENT_ONESHOT
;
1427 r
= event_source_time_prioq_put(s
, d
);
1438 _public_
int sd_event_add_time_relative(
1440 sd_event_source
**ret
,
1444 sd_event_time_handler_t callback
,
1450 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1451 * checks for overflow. */
1453 r
= sd_event_now(e
, clock
, &t
);
1457 if (usec
>= USEC_INFINITY
- t
)
1460 return sd_event_add_time(e
, ret
, clock
, t
+ usec
, accuracy
, callback
, userdata
);
1463 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1466 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1469 _public_
int sd_event_add_signal(
1471 sd_event_source
**ret
,
1473 sd_event_signal_handler_t callback
,
1476 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1477 struct signal_data
*d
;
1482 assert_return(e
, -EINVAL
);
1483 assert_return(e
= event_resolve(e
), -ENOPKG
);
1484 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1485 assert_return(!event_origin_changed(e
), -ECHILD
);
1487 /* Let's make sure our special flag stays outside of the valid signal range */
1488 assert_cc(_NSIG
< SD_EVENT_SIGNAL_PROCMASK
);
1490 if (sig
& SD_EVENT_SIGNAL_PROCMASK
) {
1491 sig
&= ~SD_EVENT_SIGNAL_PROCMASK
;
1492 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1496 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1498 r
= signal_is_blocked(sig
);
1508 callback
= signal_exit_callback
;
1510 if (!e
->signal_sources
) {
1511 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1512 if (!e
->signal_sources
)
1514 } else if (e
->signal_sources
[sig
])
1517 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1521 s
->signal
.sig
= sig
;
1522 s
->signal
.callback
= callback
;
1523 s
->userdata
= userdata
;
1524 s
->enabled
= SD_EVENT_ON
;
1526 e
->signal_sources
[sig
] = s
;
1531 if (sigemptyset(&new_ss
) < 0)
1534 if (sigaddset(&new_ss
, sig
) < 0)
1537 r
= pthread_sigmask(SIG_BLOCK
, &new_ss
, &old_ss
);
1541 r
= sigismember(&old_ss
, sig
);
1545 s
->signal
.unblock
= !r
;
1547 s
->signal
.unblock
= false;
1549 r
= event_make_signal_data(e
, sig
, &d
);
1551 if (s
->signal
.unblock
)
1552 (void) pthread_sigmask(SIG_UNBLOCK
, &new_ss
, NULL
);
1557 /* Use the signal name as description for the event source by default */
1558 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1567 static int child_exit_callback(sd_event_source
*s
, const siginfo_t
*si
, void *userdata
) {
1570 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1573 _public_
int sd_event_add_child(
1575 sd_event_source
**ret
,
1578 sd_event_child_handler_t callback
,
1581 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1584 assert_return(e
, -EINVAL
);
1585 assert_return(e
= event_resolve(e
), -ENOPKG
);
1586 assert_return(pid
> 1, -EINVAL
);
1587 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1588 assert_return(options
!= 0, -EINVAL
);
1589 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1590 assert_return(!event_origin_changed(e
), -ECHILD
);
1593 callback
= child_exit_callback
;
1595 if (e
->n_online_child_sources
== 0) {
1596 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1597 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1598 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1601 * (As an optimization we only do this check on the first child event source created.) */
1602 r
= signal_is_blocked(SIGCHLD
);
1609 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1613 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1616 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1620 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1621 * pin the PID, and make regular waitid() handling race-free. */
1623 s
->child
.pidfd
= pidfd_open(pid
, 0);
1624 if (s
->child
.pidfd
< 0)
1627 s
->child
.pidfd_owned
= true; /* If we allocate the pidfd we own it by default */
1629 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1630 s
->child
.options
= options
;
1631 s
->child
.callback
= callback
;
1632 s
->userdata
= userdata
;
1633 s
->enabled
= SD_EVENT_ONESHOT
;
1635 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
1636 /* We only want to watch for exit */
1637 r
= source_child_pidfd_register(s
, s
->enabled
);
1642 /* We shall wait for some other event than WEXITED */
1643 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1647 e
->need_process_child
= true;
1650 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1654 /* These must be done after everything succeeds. */
1656 e
->n_online_child_sources
++;
1664 _public_
int sd_event_add_child_pidfd(
1666 sd_event_source
**ret
,
1669 sd_event_child_handler_t callback
,
1672 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1676 assert_return(e
, -EINVAL
);
1677 assert_return(e
= event_resolve(e
), -ENOPKG
);
1678 assert_return(pidfd
>= 0, -EBADF
);
1679 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1680 assert_return(options
!= 0, -EINVAL
);
1681 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1682 assert_return(!event_origin_changed(e
), -ECHILD
);
1685 callback
= child_exit_callback
;
1687 if (e
->n_online_child_sources
== 0) {
1688 r
= signal_is_blocked(SIGCHLD
);
1695 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1699 r
= pidfd_get_pid(pidfd
, &pid
);
1703 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1706 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1710 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1711 s
->child
.pidfd
= pidfd
;
1712 s
->child
.options
= options
;
1713 s
->child
.callback
= callback
;
1714 s
->child
.pidfd_owned
= false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1715 s
->userdata
= userdata
;
1716 s
->enabled
= SD_EVENT_ONESHOT
;
1718 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
1719 /* We only want to watch for WEXITED */
1720 r
= source_child_pidfd_register(s
, s
->enabled
);
1724 /* We shall wait for some other event than WEXITED */
1725 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1729 e
->need_process_child
= true;
1732 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1737 e
->n_online_child_sources
++;
1745 static int generic_exit_callback(sd_event_source
*s
, void *userdata
) {
1748 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1751 _public_
int sd_event_add_defer(
1753 sd_event_source
**ret
,
1754 sd_event_handler_t callback
,
1757 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1760 assert_return(e
, -EINVAL
);
1761 assert_return(e
= event_resolve(e
), -ENOPKG
);
1762 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1763 assert_return(!event_origin_changed(e
), -ECHILD
);
1766 callback
= generic_exit_callback
;
1768 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1772 s
->defer
.callback
= callback
;
1773 s
->userdata
= userdata
;
1774 s
->enabled
= SD_EVENT_ONESHOT
;
1776 r
= source_set_pending(s
, true);
1787 _public_
int sd_event_add_post(
1789 sd_event_source
**ret
,
1790 sd_event_handler_t callback
,
1793 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1796 assert_return(e
, -EINVAL
);
1797 assert_return(e
= event_resolve(e
), -ENOPKG
);
1798 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1799 assert_return(!event_origin_changed(e
), -ECHILD
);
1802 callback
= generic_exit_callback
;
1804 s
= source_new(e
, !ret
, SOURCE_POST
);
1808 s
->post
.callback
= callback
;
1809 s
->userdata
= userdata
;
1810 s
->enabled
= SD_EVENT_ON
;
1812 r
= set_ensure_put(&e
->post_sources
, NULL
, s
);
1824 _public_
int sd_event_add_exit(
1826 sd_event_source
**ret
,
1827 sd_event_handler_t callback
,
1830 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1833 assert_return(e
, -EINVAL
);
1834 assert_return(e
= event_resolve(e
), -ENOPKG
);
1835 assert_return(callback
, -EINVAL
);
1836 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1837 assert_return(!event_origin_changed(e
), -ECHILD
);
1839 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1843 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1847 s
->exit
.callback
= callback
;
1848 s
->userdata
= userdata
;
1849 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1850 s
->enabled
= SD_EVENT_ONESHOT
;
1852 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1863 _public_
int sd_event_trim_memory(void) {
1866 /* A default implementation of a memory pressure callback. Simply releases our own allocation caches
1867 * and glibc's. This is automatically used when people call sd_event_add_memory_pressure() with a
1868 * NULL callback parameter. */
1870 log_debug("Memory pressure event, trimming malloc() memory.");
1872 struct mallinfo2 before_mallinfo
= mallinfo2();
1874 usec_t before_timestamp
= now(CLOCK_MONOTONIC
);
1875 hashmap_trim_pools();
1877 usec_t after_timestamp
= now(CLOCK_MONOTONIC
);
1880 log_debug("Successfully trimmed some memory.");
1882 log_debug("Couldn't trim any memory.");
1884 usec_t period
= after_timestamp
- before_timestamp
;
1886 struct mallinfo2 after_mallinfo
= mallinfo2();
1887 size_t l
= LESS_BY(before_mallinfo
.hblkhd
, after_mallinfo
.hblkhd
) +
1888 LESS_BY(before_mallinfo
.arena
, after_mallinfo
.arena
);
1889 log_struct(LOG_DEBUG
,
1890 LOG_MESSAGE("Memory trimming took %s, returned %s to OS.",
1891 FORMAT_TIMESPAN(period
, 0),
1893 LOG_MESSAGE_ID(SD_MESSAGE_MEMORY_TRIM_STR
),
1894 LOG_ITEM("TRIMMED_BYTES=%zu", l
),
1895 LOG_ITEM("TRIMMED_USEC=" USEC_FMT
, period
));
1900 static int memory_pressure_callback(sd_event_source
*s
, void *userdata
) {
1903 sd_event_trim_memory();
1907 _public_
int sd_event_add_memory_pressure(
1909 sd_event_source
**ret
,
1910 sd_event_handler_t callback
,
1913 _cleanup_free_
char *w
= NULL
;
1914 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1915 _cleanup_close_
int path_fd
= -EBADF
, fd
= -EBADF
;
1916 _cleanup_free_
void *write_buffer
= NULL
;
1917 const char *watch
, *watch_fallback
= NULL
, *env
;
1918 size_t write_buffer_size
= 0;
1924 assert_return(e
, -EINVAL
);
1925 assert_return(e
= event_resolve(e
), -ENOPKG
);
1926 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1927 assert_return(!event_origin_changed(e
), -ECHILD
);
1930 callback
= memory_pressure_callback
;
1932 s
= source_new(e
, !ret
, SOURCE_MEMORY_PRESSURE
);
1936 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1937 s
->memory_pressure
.callback
= callback
;
1938 s
->userdata
= userdata
;
1939 s
->enabled
= SD_EVENT_ON
;
1940 s
->memory_pressure
.fd
= -EBADF
;
1942 env
= secure_getenv("MEMORY_PRESSURE_WATCH");
1944 if (isempty(env
) || path_equal(env
, "/dev/null"))
1945 return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN
),
1946 "Memory pressure logic is explicitly disabled via $MEMORY_PRESSURE_WATCH.");
1948 if (!path_is_absolute(env
) || !path_is_normalized(env
))
1949 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG
),
1950 "$MEMORY_PRESSURE_WATCH set to invalid path: %s", env
);
1954 env
= secure_getenv("MEMORY_PRESSURE_WRITE");
1956 r
= unbase64mem(env
, &write_buffer
, &write_buffer_size
);
1964 r
= is_pressure_supported();
1970 /* By default we want to watch memory pressure on the local cgroup, but we'll fall back on
1971 * the system wide pressure if for some reason we cannot (which could be: memory controller
1972 * not delegated to us, or PSI simply not available in the kernel). On legacy cgroupv1 we'll
1973 * only use the system-wide logic. */
1974 r
= cg_all_unified();
1978 watch
= "/proc/pressure/memory";
1980 _cleanup_free_
char *cg
= NULL
;
1982 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 0, &cg
);
1986 w
= path_join("/sys/fs/cgroup", cg
, "memory.pressure");
1991 watch_fallback
= "/proc/pressure/memory";
1994 /* Android uses three levels in its userspace low memory killer logic:
1995 * some 70000 1000000
1996 * some 100000 1000000
1997 * full 70000 1000000
1999 * GNOME's low memory monitor uses:
2000 * some 70000 1000000
2001 * some 100000 1000000
2002 * full 100000 1000000
2004 * We'll default to the middle level that both agree on. Except we do it on a 2s window
2005 * (i.e. 200ms per 2s, rather than 100ms per 1s), because that's the window duration the
2006 * kernel will allow us to do unprivileged, also in the future. */
2007 if (asprintf((char**) &write_buffer
,
2008 "%s " USEC_FMT
" " USEC_FMT
,
2009 MEMORY_PRESSURE_DEFAULT_TYPE
,
2010 MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC
,
2011 MEMORY_PRESSURE_DEFAULT_WINDOW_USEC
) < 0)
2014 write_buffer_size
= strlen(write_buffer
) + 1;
2018 path_fd
= open(watch
, O_PATH
|O_CLOEXEC
);
2020 if (errno
!= ENOENT
)
2023 /* We got ENOENT. Three options now: try the fallback if we have one, or return the error as
2024 * is (if based on user/env config), or return -EOPNOTSUPP (because we picked the path, and
2025 * the PSI service apparently is not supported) */
2026 if (!watch_fallback
)
2027 return locked
? -ENOENT
: -EOPNOTSUPP
;
2029 path_fd
= open(watch_fallback
, O_PATH
|O_CLOEXEC
);
2031 if (errno
== ENOENT
) /* PSI is not available in the kernel even under the fallback path? */
2037 if (fstat(path_fd
, &st
) < 0)
2040 if (S_ISSOCK(st
.st_mode
)) {
2041 fd
= socket(AF_UNIX
, SOCK_STREAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
2045 r
= connect_unix_path(fd
, path_fd
, NULL
);
2051 } else if (S_ISREG(st
.st_mode
) || S_ISFIFO(st
.st_mode
) || S_ISCHR(st
.st_mode
)) {
2052 fd
= fd_reopen(path_fd
, (write_buffer_size
> 0 ? O_RDWR
: O_RDONLY
) |O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
2056 if (S_ISREG(st
.st_mode
)) {
2059 /* If this is a regular file validate this is a procfs or cgroupfs file, where we look for EPOLLPRI */
2061 if (fstatfs(fd
, &sfs
) < 0)
2064 if (!is_fs_type(&sfs
, PROC_SUPER_MAGIC
) &&
2065 !is_fs_type(&sfs
, CGROUP2_SUPER_MAGIC
))
2070 /* For fifos and char devices just watch for EPOLLIN */
2073 } else if (S_ISDIR(st
.st_mode
))
2078 s
->memory_pressure
.fd
= TAKE_FD(fd
);
2079 s
->memory_pressure
.write_buffer
= TAKE_PTR(write_buffer
);
2080 s
->memory_pressure
.write_buffer_size
= write_buffer_size
;
2081 s
->memory_pressure
.events
= events
;
2082 s
->memory_pressure
.locked
= locked
;
2084 /* So here's the thing: if we are talking to PSI we need to write the watch string before adding the
2085 * fd to epoll (if we ignore this, then the watch won't work). Hence we'll not actually register the
2086 * fd with the epoll right-away. Instead, we just add the event source to a list of memory pressure
2087 * event sources on which writes must be executed before the first event loop iteration is
2088 * executed. (We could also write the data here, right away, but we want to give the caller the
2089 * freedom to call sd_event_source_set_memory_pressure_type() and
2090 * sd_event_source_set_memory_pressure_rate() before we write it. */
2092 if (s
->memory_pressure
.write_buffer_size
> 0)
2093 source_memory_pressure_add_to_write_list(s
);
2095 r
= source_memory_pressure_register(s
, s
->enabled
);
2107 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
2113 assert(hashmap_isempty(d
->inodes
));
2114 assert(hashmap_isempty(d
->wd
));
2116 if (d
->buffer_filled
> 0)
2117 LIST_REMOVE(buffered
, e
->buffered_inotify_data_list
, d
);
2119 hashmap_free(d
->inodes
);
2120 hashmap_free(d
->wd
);
2122 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
2125 if (!event_origin_changed(e
) &&
2126 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
2127 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
2134 static int event_make_inotify_data(
2137 struct inotify_data
**ret
) {
2139 _cleanup_close_
int fd
= -EBADF
;
2140 struct inotify_data
*d
;
2145 d
= hashmap_get(e
->inotify_data
, &priority
);
2152 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
2156 fd
= fd_move_above_stdio(fd
);
2158 d
= new(struct inotify_data
, 1);
2162 *d
= (struct inotify_data
) {
2163 .wakeup
= WAKEUP_INOTIFY_DATA
,
2165 .priority
= priority
,
2168 r
= hashmap_ensure_put(&e
->inotify_data
, &uint64_hash_ops
, &d
->priority
, d
);
2170 d
->fd
= safe_close(d
->fd
);
2175 struct epoll_event ev
= {
2180 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
2182 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
2183 * remove the fd from the epoll first, which we don't want as we couldn't
2184 * add it in the first place. */
2185 event_free_inotify_data(e
, d
);
2195 static int inode_data_compare(const struct inode_data
*x
, const struct inode_data
*y
) {
2201 r
= CMP(x
->dev
, y
->dev
);
2205 return CMP(x
->ino
, y
->ino
);
2208 static void inode_data_hash_func(const struct inode_data
*d
, struct siphash
*state
) {
2211 siphash24_compress_typesafe(d
->dev
, state
);
2212 siphash24_compress_typesafe(d
->ino
, state
);
2215 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops
, struct inode_data
, inode_data_hash_func
, inode_data_compare
);
2217 static void event_free_inode_data(
2219 struct inode_data
*d
) {
2226 assert(!d
->event_sources
);
2229 LIST_REMOVE(to_close
, e
->inode_data_to_close_list
, d
);
2233 if (d
->inotify_data
) {
2236 if (d
->inotify_data
->fd
>= 0 && !event_origin_changed(e
)) {
2237 /* So here's a problem. At the time this runs the watch descriptor might already be
2238 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
2239 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
2240 * likely case to happen. */
2242 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
2243 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
2246 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
2249 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
2256 static void event_gc_inotify_data(
2258 struct inotify_data
*d
) {
2262 /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch
2263 * any inode with it anymore, which in turn happens if no event source of this priority is interested
2264 * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC
2265 * (under the expectation that the GC is called again once the counter is decremented). */
2270 if (!hashmap_isempty(d
->inodes
))
2276 event_free_inotify_data(e
, d
);
2279 static void event_gc_inode_data(
2281 struct inode_data
*d
) {
2283 struct inotify_data
*inotify_data
;
2290 if (d
->event_sources
)
2293 inotify_data
= d
->inotify_data
;
2294 event_free_inode_data(e
, d
);
2296 event_gc_inotify_data(e
, inotify_data
);
2299 static int event_make_inode_data(
2301 struct inotify_data
*inotify_data
,
2304 struct inode_data
**ret
) {
2306 struct inode_data
*d
, key
;
2310 assert(inotify_data
);
2312 key
= (struct inode_data
) {
2317 d
= hashmap_get(inotify_data
->inodes
, &key
);
2325 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
2329 d
= new(struct inode_data
, 1);
2333 *d
= (struct inode_data
) {
2338 .inotify_data
= inotify_data
,
2341 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
2353 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
2354 bool excl_unlink
= true;
2355 uint32_t combined
= 0;
2359 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
2360 * the IN_EXCL_UNLINK flag is ANDed instead.
2362 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
2363 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
2364 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
2365 * events we don't care for client-side. */
2367 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
2369 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
2370 excl_unlink
= false;
2372 combined
|= s
->inotify
.mask
;
2375 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
2378 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
2379 uint32_t combined_mask
;
2385 combined_mask
= inode_data_determine_mask(d
);
2387 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
2390 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
2394 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
2399 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
2401 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
2407 } else if (d
->wd
!= wd
) {
2409 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
2410 (void) inotify_rm_watch(d
->fd
, wd
);
2414 d
->combined_mask
= combined_mask
;
2418 static int inotify_exit_callback(sd_event_source
*s
, const struct inotify_event
*event
, void *userdata
) {
2421 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
2424 static int event_add_inotify_fd_internal(
2426 sd_event_source
**ret
,
2430 sd_event_inotify_handler_t callback
,
2433 _cleanup_close_
int donated_fd
= donate
? fd
: -EBADF
;
2434 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
2435 struct inotify_data
*inotify_data
= NULL
;
2436 struct inode_data
*inode_data
= NULL
;
2440 assert_return(e
, -EINVAL
);
2441 assert_return(e
= event_resolve(e
), -ENOPKG
);
2442 assert_return(fd
>= 0, -EBADF
);
2443 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2444 assert_return(!event_origin_changed(e
), -ECHILD
);
2447 callback
= inotify_exit_callback
;
2449 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
2450 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
2451 * the user can't use them for us. */
2452 if (mask
& IN_MASK_ADD
)
2455 if (fstat(fd
, &st
) < 0)
2458 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
2462 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
2463 s
->inotify
.mask
= mask
;
2464 s
->inotify
.callback
= callback
;
2465 s
->userdata
= userdata
;
2467 /* Allocate an inotify object for this priority, and an inode object within it */
2468 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
2472 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
2474 event_gc_inotify_data(e
, inotify_data
);
2478 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2479 * the event source, until then, for which we need the original inode. */
2480 if (inode_data
->fd
< 0) {
2481 if (donated_fd
>= 0)
2482 inode_data
->fd
= TAKE_FD(donated_fd
);
2484 inode_data
->fd
= fcntl(fd
, F_DUPFD_CLOEXEC
, 3);
2485 if (inode_data
->fd
< 0) {
2487 event_gc_inode_data(e
, inode_data
);
2492 LIST_PREPEND(to_close
, e
->inode_data_to_close_list
, inode_data
);
2494 _cleanup_free_
char *path
= NULL
;
2495 r
= fd_get_path(inode_data
->fd
, &path
);
2496 if (r
< 0 && r
!= -ENOSYS
) { /* The path is optional, hence ignore -ENOSYS. */
2497 event_gc_inode_data(e
, inode_data
);
2501 free_and_replace(inode_data
->path
, path
);
2504 /* Link our event source to the inode data object */
2505 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
2506 s
->inotify
.inode_data
= inode_data
;
2508 /* Actually realize the watch now */
2509 r
= inode_data_realize_watch(e
, inode_data
);
2520 _public_
int sd_event_add_inotify_fd(
2522 sd_event_source
**ret
,
2525 sd_event_inotify_handler_t callback
,
2528 return event_add_inotify_fd_internal(e
, ret
, fd
, /* donate= */ false, mask
, callback
, userdata
);
2531 _public_
int sd_event_add_inotify(
2533 sd_event_source
**ret
,
2536 sd_event_inotify_handler_t callback
,
2539 sd_event_source
*s
= NULL
; /* avoid false maybe-uninitialized warning */
2542 assert_return(path
, -EINVAL
);
2544 fd
= open(path
, O_PATH
| O_CLOEXEC
|
2545 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0) |
2546 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
2550 r
= event_add_inotify_fd_internal(e
, &s
, fd
, /* donate= */ true, mask
, callback
, userdata
);
2554 (void) sd_event_source_set_description(s
, path
);
2562 static sd_event_source
* event_source_free(sd_event_source
*s
) {
2566 /* Here's a special hack: when we are called from a
2567 * dispatch handler we won't free the event source
2568 * immediately, but we will detach the fd from the
2569 * epoll. This way it is safe for the caller to unref
2570 * the event source and immediately close the fd, but
2571 * we still retain a valid event source object after
2575 source_disconnect(s
);
2582 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source
, sd_event_source
, event_source_free
);
2584 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
2585 assert_return(s
, -EINVAL
);
2586 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2588 return free_and_strdup(&s
->description
, description
);
2591 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **ret
) {
2592 assert_return(s
, -EINVAL
);
2593 assert_return(ret
, -EINVAL
);
2595 if (!s
->description
)
2598 *ret
= s
->description
;
2602 _public_ sd_event
* sd_event_source_get_event(sd_event_source
*s
) {
2603 assert_return(s
, NULL
);
2604 assert_return(!event_origin_changed(s
->event
), NULL
);
2609 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
2610 assert_return(s
, -EINVAL
);
2611 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2612 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2613 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2618 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
2619 assert_return(s
, -EINVAL
);
2620 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2621 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2626 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
2629 assert_return(s
, -EINVAL
);
2630 assert_return(fd
>= 0, -EBADF
);
2631 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2632 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2637 saved_fd
= s
->io
.fd
;
2640 assert(event_source_is_offline(s
) == !s
->io
.registered
);
2642 if (s
->io
.registered
) {
2643 s
->io
.registered
= false;
2645 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
2647 s
->io
.fd
= saved_fd
;
2648 s
->io
.registered
= true;
2652 (void) epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
2656 safe_close(saved_fd
);
2661 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
2662 assert_return(s
, -EINVAL
);
2663 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2664 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2669 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
2670 assert_return(s
, -EINVAL
);
2671 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2672 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2678 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t *ret
) {
2679 assert_return(s
, -EINVAL
);
2680 assert_return(ret
, -EINVAL
);
2681 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2682 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2684 *ret
= s
->io
.events
;
2688 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
2691 assert_return(s
, -EINVAL
);
2692 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2693 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
2694 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2695 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2697 /* edge-triggered updates are never skipped, so we can reset edges */
2698 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
2701 r
= source_set_pending(s
, false);
2705 if (event_source_is_online(s
)) {
2706 r
= source_io_register(s
, s
->enabled
, events
);
2711 s
->io
.events
= events
;
2716 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t *ret
) {
2717 assert_return(s
, -EINVAL
);
2718 assert_return(ret
, -EINVAL
);
2719 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2720 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2725 *ret
= s
->io
.revents
;
2729 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
2730 assert_return(s
, -EINVAL
);
2731 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
2732 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2734 return s
->signal
.sig
;
2737 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *ret
) {
2738 assert_return(s
, -EINVAL
);
2739 assert_return(ret
, -EINVAL
);
2740 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2746 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
2747 bool rm_inotify
= false, rm_inode
= false;
2748 struct inotify_data
*new_inotify_data
= NULL
;
2749 struct inode_data
*new_inode_data
= NULL
;
2752 assert_return(s
, -EINVAL
);
2753 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2754 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2756 if (s
->priority
== priority
)
2759 if (s
->type
== SOURCE_INOTIFY
) {
2760 struct inode_data
*old_inode_data
;
2762 assert(s
->inotify
.inode_data
);
2763 old_inode_data
= s
->inotify
.inode_data
;
2765 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2766 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2767 * events we allow priority changes only until the first following iteration. */
2768 if (old_inode_data
->fd
< 0)
2771 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
2776 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
2781 if (new_inode_data
->fd
< 0) {
2782 /* Duplicate the fd for the new inode object if we don't have any yet */
2783 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
2784 if (new_inode_data
->fd
< 0) {
2789 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close_list
, new_inode_data
);
2791 _cleanup_free_
char *path
= NULL
;
2792 r
= fd_get_path(new_inode_data
->fd
, &path
);
2793 if (r
< 0 && r
!= -ENOSYS
)
2796 free_and_replace(new_inode_data
->path
, path
);
2799 /* Move the event source to the new inode data structure */
2800 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2801 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2802 s
->inotify
.inode_data
= new_inode_data
;
2804 /* Now create the new watch */
2805 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
2808 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2809 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2810 s
->inotify
.inode_data
= old_inode_data
;
2814 s
->priority
= priority
;
2816 event_gc_inode_data(s
->event
, old_inode_data
);
2818 } else if (s
->type
== SOURCE_SIGNAL
&& event_source_is_online(s
)) {
2819 struct signal_data
*old
, *d
;
2821 /* Move us from the signalfd belonging to the old
2822 * priority to the signalfd of the new priority */
2824 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
2826 s
->priority
= priority
;
2828 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
2830 s
->priority
= old
->priority
;
2834 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
2836 s
->priority
= priority
;
2838 event_source_pp_prioq_reshuffle(s
);
2840 if (s
->type
== SOURCE_EXIT
)
2841 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2847 event_free_inode_data(s
->event
, new_inode_data
);
2850 event_free_inotify_data(s
->event
, new_inotify_data
);
2855 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *ret
) {
2856 /* Quick mode: the event source doesn't exist and we only want to query boolean enablement state. */
2860 assert_return(s
, -EINVAL
);
2861 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
2866 return s
->enabled
!= SD_EVENT_OFF
;
2869 static int event_source_offline(
2878 assert(enabled
== SD_EVENT_OFF
|| ratelimited
);
2880 /* Unset the pending flag when this event source is disabled */
2881 if (s
->enabled
!= SD_EVENT_OFF
&&
2882 enabled
== SD_EVENT_OFF
&&
2883 !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2884 r
= source_set_pending(s
, false);
2889 was_offline
= event_source_is_offline(s
);
2890 s
->enabled
= enabled
;
2891 s
->ratelimited
= ratelimited
;
2896 source_io_unregister(s
);
2900 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2905 assert(s
->event
->n_online_child_sources
> 0);
2906 s
->event
->n_online_child_sources
--;
2909 if (EVENT_SOURCE_WATCH_PIDFD(s
))
2910 source_child_pidfd_unregister(s
);
2912 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2916 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2919 case SOURCE_MEMORY_PRESSURE
:
2920 source_memory_pressure_unregister(s
);
2923 case SOURCE_TIME_REALTIME
:
2924 case SOURCE_TIME_BOOTTIME
:
2925 case SOURCE_TIME_MONOTONIC
:
2926 case SOURCE_TIME_REALTIME_ALARM
:
2927 case SOURCE_TIME_BOOTTIME_ALARM
:
2930 case SOURCE_INOTIFY
:
2934 assert_not_reached();
2937 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2938 event_source_time_prioq_reshuffle(s
);
2943 static int event_source_online(
2952 assert(enabled
!= SD_EVENT_OFF
|| !ratelimited
);
2954 /* Unset the pending flag when this event source is enabled */
2955 if (s
->enabled
== SD_EVENT_OFF
&&
2956 enabled
!= SD_EVENT_OFF
&&
2957 !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2958 r
= source_set_pending(s
, false);
2963 /* Are we really ready for onlining? */
2964 if (enabled
== SD_EVENT_OFF
|| ratelimited
) {
2965 /* Nope, we are not ready for onlining, then just update the precise state and exit */
2966 s
->enabled
= enabled
;
2967 s
->ratelimited
= ratelimited
;
2971 was_online
= event_source_is_online(s
);
2975 r
= source_io_register(s
, enabled
, s
->io
.events
);
2981 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2983 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2990 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
2991 /* yes, we can rely on pidfd */
2993 r
= source_child_pidfd_register(s
, enabled
);
2997 /* something other to watch for than WEXITED */
2999 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
3001 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
3007 s
->event
->n_online_child_sources
++;
3010 case SOURCE_MEMORY_PRESSURE
:
3011 r
= source_memory_pressure_register(s
, enabled
);
3017 case SOURCE_TIME_REALTIME
:
3018 case SOURCE_TIME_BOOTTIME
:
3019 case SOURCE_TIME_MONOTONIC
:
3020 case SOURCE_TIME_REALTIME_ALARM
:
3021 case SOURCE_TIME_BOOTTIME_ALARM
:
3025 case SOURCE_INOTIFY
:
3029 assert_not_reached();
3032 s
->enabled
= enabled
;
3033 s
->ratelimited
= ratelimited
;
3035 /* Non-failing operations below */
3036 if (s
->type
== SOURCE_EXIT
)
3037 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
3039 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
3040 event_source_time_prioq_reshuffle(s
);
3045 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
3048 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
3050 /* Quick mode: if the source doesn't exist, SD_EVENT_OFF is a noop. */
3051 if (m
== SD_EVENT_OFF
&& !s
)
3054 assert_return(s
, -EINVAL
);
3055 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3057 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
3058 if (s
->event
->state
== SD_EVENT_FINISHED
)
3059 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
3061 if (s
->enabled
== m
) /* No change? */
3064 if (m
== SD_EVENT_OFF
)
3065 r
= event_source_offline(s
, m
, s
->ratelimited
);
3067 if (s
->enabled
!= SD_EVENT_OFF
) {
3068 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
3069 * event source is already enabled after all. */
3074 r
= event_source_online(s
, m
, s
->ratelimited
);
3079 event_source_pp_prioq_reshuffle(s
);
3083 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *ret
) {
3084 assert_return(s
, -EINVAL
);
3085 assert_return(ret
, -EINVAL
);
3086 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3087 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3089 *ret
= s
->time
.next
;
3093 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
3096 assert_return(s
, -EINVAL
);
3097 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3098 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3099 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3101 r
= source_set_pending(s
, false);
3105 s
->time
.next
= usec
;
3107 event_source_time_prioq_reshuffle(s
);
3111 _public_
int sd_event_source_set_time_relative(sd_event_source
*s
, uint64_t usec
) {
3115 assert_return(s
, -EINVAL
);
3116 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3117 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3119 if (usec
== USEC_INFINITY
)
3120 return sd_event_source_set_time(s
, USEC_INFINITY
);
3122 r
= sd_event_now(s
->event
, event_source_type_to_clock(s
->type
), &t
);
3126 usec
= usec_add(t
, usec
);
3127 if (usec
== USEC_INFINITY
)
3130 return sd_event_source_set_time(s
, usec
);
3133 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *ret
) {
3134 assert_return(s
, -EINVAL
);
3135 assert_return(ret
, -EINVAL
);
3136 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3137 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3139 *ret
= s
->time
.accuracy
;
3143 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
3146 assert_return(s
, -EINVAL
);
3147 assert_return(usec
!= UINT64_MAX
, -EINVAL
);
3148 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3149 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3150 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3152 r
= source_set_pending(s
, false);
3157 usec
= DEFAULT_ACCURACY_USEC
;
3159 s
->time
.accuracy
= usec
;
3161 event_source_time_prioq_reshuffle(s
);
3165 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*ret
) {
3166 assert_return(s
, -EINVAL
);
3167 assert_return(ret
, -EINVAL
);
3168 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3169 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3171 *ret
= event_source_type_to_clock(s
->type
);
3175 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*ret
) {
3176 assert_return(s
, -EINVAL
);
3177 assert_return(ret
, -EINVAL
);
3178 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3179 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3181 *ret
= s
->child
.pid
;
3185 _public_
int sd_event_source_get_child_pidfd(sd_event_source
*s
) {
3186 assert_return(s
, -EINVAL
);
3187 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3188 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3190 return s
->child
.pidfd
;
3193 _public_
int sd_event_source_send_child_signal(sd_event_source
*s
, int sig
, const siginfo_t
*si
, unsigned flags
) {
3194 assert_return(s
, -EINVAL
);
3195 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3196 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3197 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
3198 assert(s
->child
.pidfd
>= 0);
3200 /* If we already have seen indication the process exited refuse sending a signal early. */
3201 if (s
->child
.exited
)
3203 assert(!s
->child
.waited
);
3205 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the structure here. */
3210 return RET_NERRNO(pidfd_send_signal(s
->child
.pidfd
, sig
, si
? ©
: NULL
, flags
));
3213 _public_
int sd_event_source_get_child_pidfd_own(sd_event_source
*s
) {
3214 assert_return(s
, -EINVAL
);
3215 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3216 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3217 assert(s
->child
.pidfd
>= 0);
3219 return s
->child
.pidfd_owned
;
3222 _public_
int sd_event_source_set_child_pidfd_own(sd_event_source
*s
, int own
) {
3223 assert_return(s
, -EINVAL
);
3224 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3225 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3226 assert(s
->child
.pidfd
>= 0);
3228 s
->child
.pidfd_owned
= own
;
3232 _public_
int sd_event_source_get_child_process_own(sd_event_source
*s
) {
3233 assert_return(s
, -EINVAL
);
3234 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3235 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3237 return s
->child
.process_owned
;
3240 _public_
int sd_event_source_set_child_process_own(sd_event_source
*s
, int own
) {
3241 assert_return(s
, -EINVAL
);
3242 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3243 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3245 s
->child
.process_owned
= own
;
3249 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *ret
) {
3250 assert_return(s
, -EINVAL
);
3251 assert_return(ret
, -EINVAL
);
3252 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
3253 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3255 *ret
= s
->inotify
.mask
;
3259 _public_
int sd_event_source_get_inotify_path(sd_event_source
*s
, const char **ret
) {
3260 assert_return(s
, -EINVAL
);
3261 assert_return(ret
, -EINVAL
);
3262 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
3263 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3265 if (!s
->inotify
.inode_data
)
3266 return -ESTALE
; /* already disconnected. */
3268 if (!s
->inotify
.inode_data
->path
)
3269 return -ENOSYS
; /* /proc was not mounted? */
3271 *ret
= s
->inotify
.inode_data
->path
;
3275 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
3278 assert_return(s
, -EINVAL
);
3279 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
3280 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3281 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
3283 if (s
->prepare
== callback
)
3286 if (callback
&& s
->prepare
) {
3287 s
->prepare
= callback
;
3291 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
3295 s
->prepare
= callback
;
3298 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
3302 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
3307 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
3308 assert_return(s
, NULL
);
3309 assert_return(!event_origin_changed(s
->event
), NULL
);
3314 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
3317 assert_return(s
, NULL
);
3318 assert_return(!event_origin_changed(s
->event
), NULL
);
3321 s
->userdata
= userdata
;
3326 static int event_source_enter_ratelimited(sd_event_source
*s
) {
3331 /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
3332 * the end of the rate limit time window, much as if it was a timer event source. */
3335 return 0; /* Already ratelimited, this is a NOP hence */
3337 /* Make sure we can install a CLOCK_MONOTONIC event further down. */
3338 r
= setup_clock_data(s
->event
, &s
->event
->monotonic
, CLOCK_MONOTONIC
);
3342 /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
3343 * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
3344 * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
3345 if (EVENT_SOURCE_IS_TIME(s
->type
))
3346 event_source_time_prioq_remove(s
, event_get_clock_data(s
->event
, s
->type
));
3348 /* Now, let's add the event source to the monotonic clock instead */
3349 r
= event_source_time_prioq_put(s
, &s
->event
->monotonic
);
3353 /* And let's take the event source officially offline */
3354 r
= event_source_offline(s
, s
->enabled
, /* ratelimited= */ true);
3356 event_source_time_prioq_remove(s
, &s
->event
->monotonic
);
3360 event_source_pp_prioq_reshuffle(s
);
3362 log_debug("Event source %p (%s) entered rate limit state.", s
, strna(s
->description
));
3366 /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
3367 * space for it should already be allocated. */
3368 if (EVENT_SOURCE_IS_TIME(s
->type
))
3369 assert_se(event_source_time_prioq_put(s
, event_get_clock_data(s
->event
, s
->type
)) >= 0);
3374 static int event_source_leave_ratelimit(sd_event_source
*s
, bool run_callback
) {
3379 if (!s
->ratelimited
)
3382 /* Let's take the event source out of the monotonic prioq first. */
3383 event_source_time_prioq_remove(s
, &s
->event
->monotonic
);
3385 /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
3386 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
3387 r
= event_source_time_prioq_put(s
, event_get_clock_data(s
->event
, s
->type
));
3392 /* Let's try to take it online again. */
3393 r
= event_source_online(s
, s
->enabled
, /* ratelimited= */ false);
3395 /* Do something roughly sensible when this failed: undo the two prioq ops above */
3396 if (EVENT_SOURCE_IS_TIME(s
->type
))
3397 event_source_time_prioq_remove(s
, event_get_clock_data(s
->event
, s
->type
));
3402 event_source_pp_prioq_reshuffle(s
);
3403 ratelimit_reset(&s
->rate_limit
);
3405 log_debug("Event source %p (%s) left rate limit state.", s
, strna(s
->description
));
3407 if (run_callback
&& s
->ratelimit_expire_callback
) {
3408 s
->dispatching
= true;
3409 r
= s
->ratelimit_expire_callback(s
, s
->userdata
);
3410 s
->dispatching
= false;
3413 log_debug_errno(r
, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m",
3414 strna(s
->description
),
3415 event_source_type_to_string(s
->type
),
3416 s
->exit_on_failure
? "exiting" : "disabling");
3418 if (s
->exit_on_failure
)
3419 (void) sd_event_exit(s
->event
, r
);
3425 assert_se(sd_event_source_set_enabled(s
, SD_EVENT_OFF
) >= 0);
3433 /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
3434 * simply put it back in it, maybe we can then process it more successfully next iteration. */
3435 assert_se(event_source_time_prioq_put(s
, &s
->event
->monotonic
) >= 0);
3440 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
3447 if (a
>= USEC_INFINITY
)
3448 return USEC_INFINITY
;
3453 initialize_perturb(e
);
3456 Find a good time to wake up again between times a and b. We
3457 have two goals here:
3459 a) We want to wake up as seldom as possible, hence prefer
3460 later times over earlier times.
3462 b) But if we have to wake up, then let's make sure to
3463 dispatch as much as possible on the entire system.
3465 We implement this by waking up everywhere at the same time
3466 within any given minute if we can, synchronised via the
3467 perturbation value determined from the boot ID. If we can't,
3468 then we try to find the same spot in every 10s, then 1s and
3469 then 250ms step. Otherwise, we pick the last possible time
3473 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
3475 if (_unlikely_(c
< USEC_PER_MINUTE
))
3478 c
-= USEC_PER_MINUTE
;
3484 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
3486 if (_unlikely_(c
< USEC_PER_SEC
*10))
3489 c
-= USEC_PER_SEC
*10;
3495 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
3497 if (_unlikely_(c
< USEC_PER_SEC
))
3506 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
3508 if (_unlikely_(c
< USEC_PER_MSEC
*250))
3511 c
-= USEC_PER_MSEC
*250;
3520 static int event_arm_timer(
3522 struct clock_data
*d
) {
3524 struct itimerspec its
= {};
3525 sd_event_source
*a
, *b
;
3531 if (!d
->needs_rearm
)
3534 d
->needs_rearm
= false;
3536 a
= prioq_peek(d
->earliest
);
3537 assert(!a
|| EVENT_SOURCE_USES_TIME_PRIOQ(a
->type
));
3538 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| time_event_source_next(a
) == USEC_INFINITY
) {
3543 if (d
->next
== USEC_INFINITY
)
3547 if (timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
) < 0)
3550 d
->next
= USEC_INFINITY
;
3554 b
= prioq_peek(d
->latest
);
3555 assert(!b
|| EVENT_SOURCE_USES_TIME_PRIOQ(b
->type
));
3556 assert(b
&& b
->enabled
!= SD_EVENT_OFF
);
3558 t
= sleep_between(e
, time_event_source_next(a
), time_event_source_latest(b
));
3562 assert_se(d
->fd
>= 0);
3565 /* We don't want to disarm here, just mean some time looooong ago. */
3566 its
.it_value
.tv_sec
= 0;
3567 its
.it_value
.tv_nsec
= 1;
3569 timespec_store(&its
.it_value
, t
);
3571 if (timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
) < 0)
3578 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
3581 assert(s
->type
== SOURCE_IO
);
3583 /* If the event source was already pending, we just OR in the
3584 * new revents, otherwise we reset the value. The ORing is
3585 * necessary to handle EPOLLONESHOT events properly where
3586 * readability might happen independently of writability, and
3587 * we need to keep track of both */
3590 s
->io
.revents
|= revents
;
3592 s
->io
.revents
= revents
;
3594 return source_set_pending(s
, true);
3597 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
3604 assert_return(events
== EPOLLIN
, -EIO
);
3606 ss
= read(fd
, &x
, sizeof(x
));
3608 if (ERRNO_IS_TRANSIENT(errno
))
3614 if (_unlikely_(ss
!= sizeof(x
)))
3618 *next
= USEC_INFINITY
;
3623 static int process_timer(
3626 struct clock_data
*d
) {
3629 bool callback_invoked
= false;
3636 s
= prioq_peek(d
->earliest
);
3637 assert(!s
|| EVENT_SOURCE_USES_TIME_PRIOQ(s
->type
));
3639 if (!s
|| time_event_source_next(s
) > n
)
3642 if (s
->ratelimited
) {
3643 /* This is an event sources whose ratelimit window has ended. Let's turn it on
3645 assert(s
->ratelimited
);
3647 r
= event_source_leave_ratelimit(s
, /* run_callback */ true);
3651 callback_invoked
= true;
3656 if (s
->enabled
== SD_EVENT_OFF
|| s
->pending
)
3659 r
= source_set_pending(s
, true);
3663 event_source_time_prioq_reshuffle(s
);
3666 return callback_invoked
;
3669 static int process_child(sd_event
*e
, int64_t threshold
, int64_t *ret_min_priority
) {
3670 int64_t min_priority
= threshold
;
3671 bool something_new
= false;
3676 assert(ret_min_priority
);
3678 if (!e
->need_process_child
) {
3679 *ret_min_priority
= min_priority
;
3683 e
->need_process_child
= false;
3685 /* So, this is ugly. We iteratively invoke waitid() + WNOHANG with each child process we shall wait for,
3686 * instead of using P_ALL. This is because we only want to get child information of very specific
3687 * child processes, and not all of them. We might not have processed the SIGCHLD event
3688 * of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
3689 * hence we really don't want anything flushed out of the kernel's queue that we don't care
3690 * about. Since this is O(n) this means that if you have a lot of processes you probably want
3691 * to handle SIGCHLD yourself.
3693 * We do not reap the children here (by using WNOWAIT), this is only done after the event
3694 * source is dispatched so that the callback still sees the process as a zombie. */
3696 HASHMAP_FOREACH(s
, e
->child_sources
) {
3697 assert(s
->type
== SOURCE_CHILD
);
3698 assert(s
->child
.pidfd
>= 0);
3700 if (s
->priority
> threshold
)
3706 if (event_source_is_offline(s
))
3709 if (s
->child
.exited
)
3712 if (EVENT_SOURCE_WATCH_PIDFD(s
))
3713 /* There's a usable pidfd known for this event source? Then don't waitid() for
3717 zero(s
->child
.siginfo
);
3718 if (waitid(P_PIDFD
, s
->child
.pidfd
, &s
->child
.siginfo
,
3719 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
) < 0)
3720 return negative_errno();
3722 if (s
->child
.siginfo
.si_pid
!= 0) {
3723 bool zombie
= SIGINFO_CODE_IS_DEAD(s
->child
.siginfo
.si_code
);
3726 s
->child
.exited
= true;
3727 else if (s
->child
.options
& WEXITED
) {
3728 /* If the child isn't dead then let's immediately remove the state change
3729 * from the queue, since there's no benefit in leaving it queued. */
3731 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
3732 (void) waitid(P_PIDFD
, s
->child
.pidfd
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
3735 r
= source_set_pending(s
, true);
3739 something_new
= true;
3740 min_priority
= MIN(min_priority
, s
->priority
);
3745 *ret_min_priority
= min_priority
;
3746 return something_new
;
3749 static int process_pidfd(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
3752 assert(s
->type
== SOURCE_CHILD
);
3753 assert(s
->child
.pidfd
>= 0);
3758 if (event_source_is_offline(s
))
3761 if (!EVENT_SOURCE_WATCH_PIDFD(s
))
3764 /* Note that pidfd would also generate EPOLLHUP when the process gets reaped. But at this point we
3765 * only permit EPOLLIN, under the assumption that upon EPOLLHUP the child source should already
3766 * be set to pending, and we would have returned early above. */
3767 assert(!s
->child
.exited
);
3769 zero(s
->child
.siginfo
);
3770 if (waitid(P_PIDFD
, s
->child
.pidfd
, &s
->child
.siginfo
, WNOHANG
| WNOWAIT
| s
->child
.options
) < 0)
3773 if (s
->child
.siginfo
.si_pid
== 0)
3776 if (SIGINFO_CODE_IS_DEAD(s
->child
.siginfo
.si_code
))
3777 s
->child
.exited
= true;
3779 return source_set_pending(s
, true);
3782 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
, int64_t *min_priority
) {
3787 assert_return(events
== EPOLLIN
, -EIO
);
3788 assert(min_priority
);
3790 /* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make
3791 * sure to recheck the children we watch. This is because we only ever dequeue the first signal
3792 * per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know,
3793 * but we might have higher priority children we care about hence we need to check that
3796 if (sigismember(&d
->sigset
, SIGCHLD
))
3797 e
->need_process_child
= true;
3799 /* If there's already an event source pending for this priority we don't read another */
3804 struct signalfd_siginfo si
;
3806 sd_event_source
*s
= NULL
;
3808 n
= read(d
->fd
, &si
, sizeof(si
));
3810 if (ERRNO_IS_TRANSIENT(errno
))
3816 if (_unlikely_(n
!= sizeof(si
)))
3819 if (_unlikely_(!SIGNAL_VALID(si
.ssi_signo
)))
3822 if (e
->signal_sources
)
3823 s
= e
->signal_sources
[si
.ssi_signo
];
3829 s
->signal
.siginfo
= si
;
3832 r
= source_set_pending(s
, true);
3835 if (r
> 0 && *min_priority
>= s
->priority
) {
3836 *min_priority
= s
->priority
;
3837 return 1; /* an event source with smaller priority is queued. */
3844 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
, int64_t threshold
) {
3850 assert_return(revents
== EPOLLIN
, -EIO
);
3852 /* If there's already an event source pending for this priority, don't read another */
3853 if (d
->n_pending
> 0)
3856 /* Is the read buffer non-empty? If so, let's not read more */
3857 if (d
->buffer_filled
> 0)
3860 if (d
->priority
> threshold
)
3863 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
3865 if (ERRNO_IS_TRANSIENT(errno
))
3872 d
->buffer_filled
= (size_t) n
;
3873 LIST_PREPEND(buffered
, e
->buffered_inotify_data_list
, d
);
3878 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
3881 assert(sz
<= d
->buffer_filled
);
3886 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3887 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
3888 d
->buffer_filled
-= sz
;
3890 if (d
->buffer_filled
== 0)
3891 LIST_REMOVE(buffered
, e
->buffered_inotify_data_list
, d
);
3894 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
3900 /* If there's already an event source pending for this priority, don't read another */
3901 if (d
->n_pending
> 0)
3904 while (d
->buffer_filled
> 0) {
3907 /* Let's validate that the event structures are complete */
3908 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
3911 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
3912 if (d
->buffer_filled
< sz
)
3915 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
3916 struct inode_data
*inode_data
;
3918 /* The queue overran, let's pass this event to all event sources connected to this inotify
3921 HASHMAP_FOREACH(inode_data
, d
->inodes
)
3922 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
3924 if (event_source_is_offline(s
))
3927 r
= source_set_pending(s
, true);
3932 struct inode_data
*inode_data
;
3934 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3935 * our watch descriptor table. */
3936 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
3938 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3940 event_inotify_data_drop(e
, d
, sz
);
3944 /* The watch descriptor was removed by the kernel, let's drop it here too */
3945 inode_data
->wd
= -1;
3947 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3949 event_inotify_data_drop(e
, d
, sz
);
3954 /* Trigger all event sources that are interested in these events. Also trigger all event
3955 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3956 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
3958 if (event_source_is_offline(s
))
3961 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
3962 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
3965 r
= source_set_pending(s
, true);
3971 /* Something pending now? If so, let's finish, otherwise let's read more. */
3972 if (d
->n_pending
> 0)
3979 static int process_inotify(sd_event
*e
) {
3984 LIST_FOREACH(buffered
, d
, e
->buffered_inotify_data_list
) {
3985 r
= event_inotify_data_process(e
, d
);
3995 static int process_memory_pressure(sd_event_source
*s
, uint32_t revents
) {
3997 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
4000 s
->memory_pressure
.revents
|= revents
;
4002 s
->memory_pressure
.revents
= revents
;
4004 return source_set_pending(s
, true);
4007 static int source_memory_pressure_write(sd_event_source
*s
) {
4012 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
4014 /* once we start writing, the buffer is locked, we allow no further changes. */
4015 s
->memory_pressure
.locked
= true;
4017 if (s
->memory_pressure
.write_buffer_size
> 0) {
4018 n
= write(s
->memory_pressure
.fd
, s
->memory_pressure
.write_buffer
, s
->memory_pressure
.write_buffer_size
);
4020 if (!ERRNO_IS_TRANSIENT(errno
)) {
4021 /* If kernel is built with CONFIG_PSI_DEFAULT_DISABLED it will expose PSI
4022 * files, but then generates EOPNOSUPP on read() and write() (instead of on
4023 * open()!). This sucks hard, since we can only detect this kind of failure
4024 * so late. Let's make the best of it, and turn off the event source like we
4025 * do for failed event source handlers. */
4027 log_debug_errno(errno
, "Writing memory pressure settings to kernel failed, disabling memory pressure event source: %m");
4028 assert_se(sd_event_source_set_enabled(s
, SD_EVENT_OFF
) >= 0);
4039 if ((size_t) n
== s
->memory_pressure
.write_buffer_size
) {
4040 s
->memory_pressure
.write_buffer
= mfree(s
->memory_pressure
.write_buffer
);
4043 s
->memory_pressure
.write_buffer_size
= 0;
4045 /* Update epoll events mask, since we have now written everything and don't care for EPOLLOUT anymore */
4046 r
= source_memory_pressure_register(s
, s
->enabled
);
4051 _cleanup_free_
void *c
= NULL
;
4053 assert((size_t) n
< s
->memory_pressure
.write_buffer_size
);
4055 c
= memdup((uint8_t*) s
->memory_pressure
.write_buffer
+ n
, s
->memory_pressure
.write_buffer_size
- n
);
4059 free_and_replace(s
->memory_pressure
.write_buffer
, c
);
4060 s
->memory_pressure
.write_buffer_size
-= n
;
4067 static int source_memory_pressure_initiate_dispatch(sd_event_source
*s
) {
4071 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
4073 r
= source_memory_pressure_write(s
);
4077 return 1; /* if we wrote something, then don't continue with dispatching user dispatch
4078 * function. Instead, shortcut it so that we wait for next EPOLLOUT immediately. */
4080 /* No pending incoming IO? Then let's not continue further */
4081 if ((s
->memory_pressure
.revents
& (EPOLLIN
|EPOLLPRI
)) == 0) {
4083 /* Treat IO errors on the notifier the same ways errors returned from a callback */
4084 if ((s
->memory_pressure
.revents
& (EPOLLHUP
|EPOLLERR
|EPOLLRDHUP
)) != 0)
4087 return 1; /* leave dispatch, we already processed everything */
4090 if (s
->memory_pressure
.revents
& EPOLLIN
) {
4091 uint8_t pipe_buf
[PIPE_BUF
];
4094 /* If the fd is readable, then flush out anything that might be queued */
4096 n
= read(s
->memory_pressure
.fd
, pipe_buf
, sizeof(pipe_buf
));
4097 if (n
< 0 && !ERRNO_IS_TRANSIENT(errno
))
4101 return 0; /* go on, dispatch to user callback */
4104 static int source_dispatch(sd_event_source
*s
) {
4105 EventSourceType saved_type
;
4106 sd_event
*saved_event
;
4110 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
4112 /* Save the event source type, here, so that we still know it after the event callback which might
4113 * invalidate the event. */
4114 saved_type
= s
->type
;
4116 /* Similarly, store a reference to the event loop object, so that we can still access it after the
4117 * callback might have invalidated/disconnected the event source. */
4118 saved_event
= s
->event
;
4119 PROTECT_EVENT(saved_event
);
4121 /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
4122 assert(!s
->ratelimited
);
4123 if (!ratelimit_below(&s
->rate_limit
)) {
4124 r
= event_source_enter_ratelimited(s
);
4131 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
4132 r
= source_set_pending(s
, false);
4137 if (s
->type
!= SOURCE_POST
) {
4140 /* If we execute a non-post source, let's mark all post sources as pending. */
4142 SET_FOREACH(z
, s
->event
->post_sources
) {
4143 if (event_source_is_offline(z
))
4146 r
= source_set_pending(z
, true);
4152 if (s
->type
== SOURCE_MEMORY_PRESSURE
) {
4153 r
= source_memory_pressure_initiate_dispatch(s
);
4154 if (r
== -EIO
) /* handle EIO errors similar to callback errors */
4158 if (r
> 0) /* already handled */
4162 if (s
->enabled
== SD_EVENT_ONESHOT
) {
4163 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
4168 s
->dispatching
= true;
4173 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
4176 case SOURCE_TIME_REALTIME
:
4177 case SOURCE_TIME_BOOTTIME
:
4178 case SOURCE_TIME_MONOTONIC
:
4179 case SOURCE_TIME_REALTIME_ALARM
:
4180 case SOURCE_TIME_BOOTTIME_ALARM
:
4181 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
4185 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
4188 case SOURCE_CHILD
: {
4189 bool zombie
= SIGINFO_CODE_IS_DEAD(s
->child
.siginfo
.si_code
);
4191 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
4193 /* Now, reap the PID for good. */
4195 (void) waitid(P_PIDFD
, s
->child
.pidfd
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
4196 s
->child
.waited
= true;
4203 r
= s
->defer
.callback(s
, s
->userdata
);
4207 r
= s
->post
.callback(s
, s
->userdata
);
4211 r
= s
->exit
.callback(s
, s
->userdata
);
4214 case SOURCE_INOTIFY
: {
4215 struct sd_event
*e
= s
->event
;
4216 struct inotify_data
*d
;
4219 assert(s
->inotify
.inode_data
);
4220 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
4222 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
4223 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
4224 assert(d
->buffer_filled
>= sz
);
4226 /* If the inotify callback destroys the event source then this likely means we don't need to
4227 * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd
4228 * free it immediately, then we couldn't drop the event from the inotify event queue without
4229 * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it
4230 * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then
4231 * explicitly GC it after we are done dropping the inotify event from the buffer. */
4233 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
4236 /* When no event is pending anymore on this inotify object, then let's drop the event from
4237 * the inotify event queue buffer. */
4238 if (d
->n_pending
== 0)
4239 event_inotify_data_drop(e
, d
, sz
);
4241 /* Now we don't want to access 'd' anymore, it's OK to GC now. */
4242 event_gc_inotify_data(e
, d
);
4246 case SOURCE_MEMORY_PRESSURE
:
4247 r
= s
->memory_pressure
.callback(s
, s
->userdata
);
4250 case SOURCE_WATCHDOG
:
4251 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
4252 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
4253 assert_not_reached();
4256 s
->dispatching
= false;
4260 log_debug_errno(r
, "Event source %s (type %s) returned error, %s: %m",
4261 strna(s
->description
),
4262 event_source_type_to_string(saved_type
),
4263 s
->exit_on_failure
? "exiting" : "disabling");
4265 if (s
->exit_on_failure
)
4266 (void) sd_event_exit(saved_event
, r
);
4272 assert_se(sd_event_source_set_enabled(s
, SD_EVENT_OFF
) >= 0);
4277 static int event_prepare(sd_event
*e
) {
4285 s
= prioq_peek(e
->prepare
);
4286 if (!s
|| s
->prepare_iteration
== e
->iteration
|| event_source_is_offline(s
))
4289 s
->prepare_iteration
= e
->iteration
;
4290 prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
4293 s
->dispatching
= true;
4294 r
= s
->prepare(s
, s
->userdata
);
4295 s
->dispatching
= false;
4298 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, %s: %m",
4299 strna(s
->description
),
4300 event_source_type_to_string(s
->type
),
4301 s
->exit_on_failure
? "exiting" : "disabling");
4303 if (s
->exit_on_failure
)
4304 (void) sd_event_exit(e
, r
);
4310 assert_se(sd_event_source_set_enabled(s
, SD_EVENT_OFF
) >= 0);
4316 static int dispatch_exit(sd_event
*e
) {
4322 p
= prioq_peek(e
->exit
);
4323 assert(!p
|| p
->type
== SOURCE_EXIT
);
4325 if (!p
|| event_source_is_offline(p
)) {
4326 e
->state
= SD_EVENT_FINISHED
;
4332 e
->state
= SD_EVENT_EXITING
;
4333 r
= source_dispatch(p
);
4334 e
->state
= SD_EVENT_INITIAL
;
4338 static sd_event_source
* event_next_pending(sd_event
*e
) {
4343 p
= prioq_peek(e
->pending
);
4347 if (event_source_is_offline(p
))
4353 static int arm_watchdog(sd_event
*e
) {
4354 struct itimerspec its
= {};
4358 assert(e
->watchdog_fd
>= 0);
4360 t
= sleep_between(e
,
4361 usec_add(e
->watchdog_last
, (e
->watchdog_period
/ 2)),
4362 usec_add(e
->watchdog_last
, (e
->watchdog_period
* 3 / 4)));
4364 timespec_store(&its
.it_value
, t
);
4366 /* Make sure we never set the watchdog to 0, which tells the
4367 * kernel to disable it. */
4368 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
4369 its
.it_value
.tv_nsec
= 1;
4371 return RET_NERRNO(timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
));
4374 static int process_watchdog(sd_event
*e
) {
4380 /* Don't notify watchdog too often */
4381 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
4384 sd_notify(false, "WATCHDOG=1");
4385 e
->watchdog_last
= e
->timestamp
.monotonic
;
4387 return arm_watchdog(e
);
4390 static void event_close_inode_data_fds(sd_event
*e
) {
4391 struct inode_data
*d
;
4395 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
4396 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
4397 * adjustments to the event source, such as changing the priority (which requires us to remove and re-add a watch
4398 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
4401 while ((d
= e
->inode_data_to_close_list
)) {
4403 d
->fd
= safe_close(d
->fd
);
4405 LIST_REMOVE(to_close
, e
->inode_data_to_close_list
, d
);
4409 static int event_memory_pressure_write_list(sd_event
*e
) {
4417 s
= LIST_POP(memory_pressure
.write_list
, e
->memory_pressure_write_list
);
4421 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
4422 assert(s
->memory_pressure
.write_buffer_size
> 0);
4423 s
->memory_pressure
.in_write_list
= false;
4425 r
= source_memory_pressure_write(s
);
4433 _public_
int sd_event_prepare(sd_event
*e
) {
4436 assert_return(e
, -EINVAL
);
4437 assert_return(e
= event_resolve(e
), -ENOPKG
);
4438 assert_return(!event_origin_changed(e
), -ECHILD
);
4439 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4440 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
4442 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
4443 * this check here once, since gettid() is typically not cached, and thus want to minimize
4445 assert_return(!e
->default_event_ptr
|| e
->tid
== gettid(), -EREMOTEIO
);
4447 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4450 if (e
->exit_requested
)
4455 e
->state
= SD_EVENT_PREPARING
;
4456 r
= event_prepare(e
);
4457 e
->state
= SD_EVENT_INITIAL
;
4461 r
= event_memory_pressure_write_list(e
);
4465 r
= event_arm_timer(e
, &e
->realtime
);
4469 r
= event_arm_timer(e
, &e
->boottime
);
4473 r
= event_arm_timer(e
, &e
->monotonic
);
4477 r
= event_arm_timer(e
, &e
->realtime_alarm
);
4481 r
= event_arm_timer(e
, &e
->boottime_alarm
);
4485 event_close_inode_data_fds(e
);
4487 if (event_next_pending(e
) || e
->need_process_child
|| e
->buffered_inotify_data_list
)
4490 e
->state
= SD_EVENT_ARMED
;
4495 e
->state
= SD_EVENT_ARMED
;
4496 r
= sd_event_wait(e
, 0);
4498 e
->state
= SD_EVENT_ARMED
;
4503 static int epoll_wait_usec(
4505 struct epoll_event
*events
,
4510 /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not. */
4512 #if HAVE_EPOLL_PWAIT2
4513 static bool epoll_pwait2_absent
= false;
4516 /* epoll_pwait2() was added to Linux 5.11 (2021-02-14) and to glibc in 2.35 (2022-02-03). In contrast
4517 * to other syscalls we don't bother with our own fallback syscall wrappers on old libcs, since this
4518 * is not that obvious to implement given the libc and kernel definitions differ in the last
4519 * argument. Moreover, the only reason to use it is the more accurate timeouts (which is not a
4520 * biggie), let's hence rely on glibc's definitions, and fallback to epoll_pwait() when that's
4523 if (!epoll_pwait2_absent
&& timeout
!= USEC_INFINITY
) {
4524 r
= epoll_pwait2(fd
,
4527 TIMESPEC_STORE(timeout
),
4531 if (!ERRNO_IS_NOT_SUPPORTED(errno
) && !ERRNO_IS_PRIVILEGE(errno
))
4532 return -errno
; /* Only fallback to old epoll_wait() if the syscall is masked or not
4535 epoll_pwait2_absent
= true;
4539 if (timeout
== USEC_INFINITY
)
4544 k
= DIV_ROUND_UP(timeout
, USEC_PER_MSEC
);
4546 msec
= INT_MAX
; /* Saturate */
4551 return RET_NERRNO(epoll_wait(fd
, events
, maxevents
, msec
));
4554 static int process_epoll(sd_event
*e
, usec_t timeout
, int64_t threshold
, int64_t *ret_min_priority
) {
4555 size_t n_event_queue
, m
, n_event_max
;
4556 int64_t min_priority
= threshold
;
4557 bool something_new
= false;
4561 assert(ret_min_priority
);
4563 n_event_queue
= MAX(e
->n_sources
, 1u);
4564 if (!GREEDY_REALLOC(e
->event_queue
, n_event_queue
))
4567 n_event_max
= MALLOC_ELEMENTSOF(e
->event_queue
);
4569 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
4570 if (e
->buffered_inotify_data_list
)
4574 r
= epoll_wait_usec(
4584 if (m
< n_event_max
)
4587 if (n_event_max
>= n_event_queue
* 10)
4590 if (!GREEDY_REALLOC(e
->event_queue
, n_event_max
+ n_event_queue
))
4593 n_event_max
= MALLOC_ELEMENTSOF(e
->event_queue
);
4597 /* Set timestamp only when this is called first time. */
4598 if (threshold
== INT64_MAX
)
4599 triple_timestamp_now(&e
->timestamp
);
4601 for (size_t i
= 0; i
< m
; i
++) {
4603 if (e
->event_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
4604 r
= flush_timer(e
, e
->watchdog_fd
, e
->event_queue
[i
].events
, NULL
);
4606 WakeupType
*t
= e
->event_queue
[i
].data
.ptr
;
4610 case WAKEUP_EVENT_SOURCE
: {
4611 sd_event_source
*s
= e
->event_queue
[i
].data
.ptr
;
4615 if (s
->priority
> threshold
)
4618 min_priority
= MIN(min_priority
, s
->priority
);
4623 r
= process_io(e
, s
, e
->event_queue
[i
].events
);
4627 r
= process_pidfd(e
, s
, e
->event_queue
[i
].events
);
4630 case SOURCE_MEMORY_PRESSURE
:
4631 r
= process_memory_pressure(s
, e
->event_queue
[i
].events
);
4635 assert_not_reached();
4641 case WAKEUP_CLOCK_DATA
: {
4642 struct clock_data
*d
= e
->event_queue
[i
].data
.ptr
;
4646 r
= flush_timer(e
, d
->fd
, e
->event_queue
[i
].events
, &d
->next
);
4650 case WAKEUP_SIGNAL_DATA
:
4651 r
= process_signal(e
, e
->event_queue
[i
].data
.ptr
, e
->event_queue
[i
].events
, &min_priority
);
4654 case WAKEUP_INOTIFY_DATA
:
4655 r
= event_inotify_data_read(e
, e
->event_queue
[i
].data
.ptr
, e
->event_queue
[i
].events
, threshold
);
4659 assert_not_reached();
4665 something_new
= true;
4668 *ret_min_priority
= min_priority
;
4669 return something_new
;
4672 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
4675 assert_return(e
, -EINVAL
);
4676 assert_return(e
= event_resolve(e
), -ENOPKG
);
4677 assert_return(!event_origin_changed(e
), -ECHILD
);
4678 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4679 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
4681 if (e
->exit_requested
) {
4682 e
->state
= SD_EVENT_PENDING
;
4686 for (int64_t threshold
= INT64_MAX
; ; threshold
--) {
4687 int64_t epoll_min_priority
, child_min_priority
;
4689 /* There may be a possibility that new epoll (especially IO) and child events are
4690 * triggered just after process_epoll() call but before process_child(), and the new IO
4691 * events may have higher priority than the child events. To salvage these events,
4692 * let's call epoll_wait() again, but accepts only events with higher priority than the
4693 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
4694 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
4695 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
4697 r
= process_epoll(e
, timeout
, threshold
, &epoll_min_priority
);
4699 e
->state
= SD_EVENT_PENDING
;
4704 if (r
== 0 && threshold
< INT64_MAX
)
4705 /* No new epoll event. */
4708 r
= process_child(e
, threshold
, &child_min_priority
);
4712 /* No new child event. */
4715 threshold
= MIN(epoll_min_priority
, child_min_priority
);
4716 if (threshold
== INT64_MIN
)
4722 r
= process_watchdog(e
);
4726 r
= process_inotify(e
);
4730 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
4734 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
4738 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
4742 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
4746 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
4750 /* Ratelimit expiry callback was called. Let's postpone processing pending sources and
4751 * put loop in the initial state in order to evaluate (in the next iteration) also sources
4752 * there were potentially re-enabled by the callback.
4754 * Wondering why we treat only this invocation of process_timer() differently? Once event
4755 * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence
4756 * ratelimit expiry callback is never called for any other timer type. */
4761 if (event_next_pending(e
)) {
4762 e
->state
= SD_EVENT_PENDING
;
4769 e
->state
= SD_EVENT_INITIAL
;
4774 _public_
int sd_event_dispatch(sd_event
*e
) {
4778 assert_return(e
, -EINVAL
);
4779 assert_return(e
= event_resolve(e
), -ENOPKG
);
4780 assert_return(!event_origin_changed(e
), -ECHILD
);
4781 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4782 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
4784 if (e
->exit_requested
)
4785 return dispatch_exit(e
);
4787 p
= event_next_pending(e
);
4791 e
->state
= SD_EVENT_RUNNING
;
4792 r
= source_dispatch(p
);
4793 e
->state
= SD_EVENT_INITIAL
;
4797 e
->state
= SD_EVENT_INITIAL
;
4802 static void event_log_delays(sd_event
*e
) {
4803 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1], *p
;
4808 FOREACH_ELEMENT(delay
, e
->delays
) {
4809 l
= strpcpyf(&p
, l
, "%u ", *delay
);
4812 log_debug("Event loop iterations: %s", b
);
4815 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
4818 assert_return(e
, -EINVAL
);
4819 assert_return(e
= event_resolve(e
), -ENOPKG
);
4820 assert_return(!event_origin_changed(e
), -ECHILD
);
4821 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4822 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
4824 if (e
->profile_delays
&& e
->last_run_usec
!= 0) {
4828 this_run
= now(CLOCK_MONOTONIC
);
4830 l
= log2u64(this_run
- e
->last_run_usec
);
4831 assert(l
< ELEMENTSOF(e
->delays
));
4834 if (this_run
- e
->last_log_usec
>= 5*USEC_PER_SEC
) {
4835 event_log_delays(e
);
4836 e
->last_log_usec
= this_run
;
4840 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4843 r
= sd_event_prepare(e
);
4845 /* There was nothing? Then wait... */
4846 r
= sd_event_wait(e
, timeout
);
4848 if (e
->profile_delays
)
4849 e
->last_run_usec
= now(CLOCK_MONOTONIC
);
4852 /* There's something now, then let's dispatch it */
4853 r
= sd_event_dispatch(e
);
4863 _public_
int sd_event_loop(sd_event
*e
) {
4866 assert_return(e
, -EINVAL
);
4867 assert_return(e
= event_resolve(e
), -ENOPKG
);
4868 assert_return(!event_origin_changed(e
), -ECHILD
);
4869 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
4873 while (e
->state
!= SD_EVENT_FINISHED
) {
4874 r
= sd_event_run(e
, UINT64_MAX
);
4879 return e
->exit_code
;
4882 _public_
int sd_event_get_fd(sd_event
*e
) {
4883 assert_return(e
, -EINVAL
);
4884 assert_return(e
= event_resolve(e
), -ENOPKG
);
4885 assert_return(!event_origin_changed(e
), -ECHILD
);
4890 _public_
int sd_event_get_state(sd_event
*e
) {
4891 assert_return(e
, -EINVAL
);
4892 assert_return(e
= event_resolve(e
), -ENOPKG
);
4893 assert_return(!event_origin_changed(e
), -ECHILD
);
4898 _public_
int sd_event_get_exit_code(sd_event
*e
, int *ret
) {
4899 assert_return(e
, -EINVAL
);
4900 assert_return(e
= event_resolve(e
), -ENOPKG
);
4901 assert_return(!event_origin_changed(e
), -ECHILD
);
4903 if (!e
->exit_requested
)
4907 *ret
= e
->exit_code
;
4911 _public_
int sd_event_exit(sd_event
*e
, int code
) {
4912 assert_return(e
, -EINVAL
);
4913 assert_return(e
= event_resolve(e
), -ENOPKG
);
4914 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4915 assert_return(!event_origin_changed(e
), -ECHILD
);
4917 e
->exit_requested
= true;
4918 e
->exit_code
= code
;
4923 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *ret
) {
4924 assert_return(e
, -EINVAL
);
4925 assert_return(e
= event_resolve(e
), -ENOPKG
);
4926 assert_return(ret
, -EINVAL
);
4927 assert_return(!event_origin_changed(e
), -ECHILD
);
4929 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
4932 if (!triple_timestamp_is_set(&e
->timestamp
)) {
4933 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
4938 *ret
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
4942 _public_
int sd_event_default(sd_event
**ret
) {
4947 return !!default_event
;
4949 if (default_event
) {
4950 *ret
= sd_event_ref(default_event
);
4954 r
= sd_event_new(&e
);
4958 e
->default_event_ptr
= &default_event
;
4966 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*ret
) {
4967 assert_return(e
, -EINVAL
);
4968 assert_return(e
= event_resolve(e
), -ENOPKG
);
4969 assert_return(ret
, -EINVAL
);
4970 assert_return(!event_origin_changed(e
), -ECHILD
);
4979 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
4982 assert_return(e
, -EINVAL
);
4983 assert_return(e
= event_resolve(e
), -ENOPKG
);
4984 assert_return(!event_origin_changed(e
), -ECHILD
);
4986 if (e
->watchdog
== !!b
)
4990 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
4994 /* Issue first ping immediately */
4995 sd_notify(false, "WATCHDOG=1");
4996 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
4998 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
4999 if (e
->watchdog_fd
< 0)
5002 r
= arm_watchdog(e
);
5006 struct epoll_event ev
= {
5008 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
5011 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
) < 0) {
5017 if (e
->watchdog_fd
>= 0) {
5018 (void) epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
5019 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
5027 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
5031 _public_
int sd_event_get_watchdog(sd_event
*e
) {
5032 assert_return(e
, -EINVAL
);
5033 assert_return(e
= event_resolve(e
), -ENOPKG
);
5034 assert_return(!event_origin_changed(e
), -ECHILD
);
5039 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
5040 assert_return(e
, -EINVAL
);
5041 assert_return(e
= event_resolve(e
), -ENOPKG
);
5042 assert_return(!event_origin_changed(e
), -ECHILD
);
5044 *ret
= e
->iteration
;
5048 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
5049 assert_return(s
, -EINVAL
);
5050 assert_return(s
->event
, -EINVAL
);
5051 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5053 s
->destroy_callback
= callback
;
5057 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
5058 assert_return(s
, -EINVAL
);
5059 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5062 *ret
= s
->destroy_callback
;
5064 return !!s
->destroy_callback
;
5067 _public_
int sd_event_source_get_floating(sd_event_source
*s
) {
5068 assert_return(s
, -EINVAL
);
5069 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5074 _public_
int sd_event_source_set_floating(sd_event_source
*s
, int b
) {
5075 assert_return(s
, -EINVAL
);
5076 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5078 if (s
->floating
== !!b
)
5081 if (!s
->event
) /* Already disconnected */
5087 sd_event_source_ref(s
);
5088 sd_event_unref(s
->event
);
5090 sd_event_ref(s
->event
);
5091 sd_event_source_unref(s
);
5097 _public_
int sd_event_source_get_exit_on_failure(sd_event_source
*s
) {
5098 assert_return(s
, -EINVAL
);
5099 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
5100 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5102 return s
->exit_on_failure
;
5105 _public_
int sd_event_source_set_exit_on_failure(sd_event_source
*s
, int b
) {
5106 assert_return(s
, -EINVAL
);
5107 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
5108 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5110 if (s
->exit_on_failure
== !!b
)
5113 s
->exit_on_failure
= b
;
5117 _public_
int sd_event_source_set_ratelimit(sd_event_source
*s
, uint64_t interval
, unsigned burst
) {
5120 assert_return(s
, -EINVAL
);
5121 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5123 /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
5124 * so is a programming error. */
5125 assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s
->type
), -EDOM
);
5127 /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
5128 * non-ratelimited. */
5129 r
= event_source_leave_ratelimit(s
, /* run_callback */ false);
5133 s
->rate_limit
= (RateLimit
) { interval
, burst
};
5137 _public_
int sd_event_source_set_ratelimit_expire_callback(sd_event_source
*s
, sd_event_handler_t callback
) {
5138 assert_return(s
, -EINVAL
);
5139 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5141 s
->ratelimit_expire_callback
= callback
;
5145 _public_
int sd_event_source_get_ratelimit(sd_event_source
*s
, uint64_t *ret_interval
, unsigned *ret_burst
) {
5146 assert_return(s
, -EINVAL
);
5147 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5149 /* Querying whether an event source has ratelimiting configured is not a loggable offense, hence
5150 * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error. */
5151 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s
->type
))
5154 if (!ratelimit_configured(&s
->rate_limit
))
5158 *ret_interval
= s
->rate_limit
.interval
;
5160 *ret_burst
= s
->rate_limit
.burst
;
5165 _public_
int sd_event_source_is_ratelimited(sd_event_source
*s
) {
5166 assert_return(s
, -EINVAL
);
5167 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5169 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s
->type
))
5172 if (!ratelimit_configured(&s
->rate_limit
))
5175 return s
->ratelimited
;
5178 _public_
int sd_event_source_leave_ratelimit(sd_event_source
*s
) {
5181 assert_return(s
, -EINVAL
);
5183 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s
->type
))
5186 if (!ratelimit_configured(&s
->rate_limit
))
5189 if (!s
->ratelimited
)
5192 r
= event_source_leave_ratelimit(s
, /* run_callback */ false);
5196 return 1; /* tell caller that we indeed just left the ratelimit state */
5199 _public_
int sd_event_set_signal_exit(sd_event
*e
, int b
) {
5200 bool change
= false;
5203 assert_return(e
, -EINVAL
);
5204 assert_return(e
= event_resolve(e
), -ENOPKG
);
5205 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
5206 assert_return(!event_origin_changed(e
), -ECHILD
);
5209 /* We want to maintain pointers to these event sources, so that we can destroy them when told
5210 * so. But we also don't want them to pin the event loop itself. Hence we mark them as
5211 * floating after creation (and undo this before deleting them again). */
5213 if (!e
->sigint_event_source
) {
5214 r
= sd_event_add_signal(e
, &e
->sigint_event_source
, SIGINT
| SD_EVENT_SIGNAL_PROCMASK
, NULL
, NULL
);
5218 assert_se(sd_event_source_set_floating(e
->sigint_event_source
, true) >= 0);
5222 if (!e
->sigterm_event_source
) {
5223 r
= sd_event_add_signal(e
, &e
->sigterm_event_source
, SIGTERM
| SD_EVENT_SIGNAL_PROCMASK
, NULL
, NULL
);
5226 assert_se(sd_event_source_set_floating(e
->sigint_event_source
, false) >= 0);
5227 e
->sigint_event_source
= sd_event_source_unref(e
->sigint_event_source
);
5233 assert_se(sd_event_source_set_floating(e
->sigterm_event_source
, true) >= 0);
5238 if (e
->sigint_event_source
) {
5239 assert_se(sd_event_source_set_floating(e
->sigint_event_source
, false) >= 0);
5240 e
->sigint_event_source
= sd_event_source_unref(e
->sigint_event_source
);
5244 if (e
->sigterm_event_source
) {
5245 assert_se(sd_event_source_set_floating(e
->sigterm_event_source
, false) >= 0);
5246 e
->sigterm_event_source
= sd_event_source_unref(e
->sigterm_event_source
);
5254 _public_
int sd_event_source_set_memory_pressure_type(sd_event_source
*s
, const char *ty
) {
5255 _cleanup_free_
char *b
= NULL
;
5256 _cleanup_free_
void *w
= NULL
;
5258 assert_return(s
, -EINVAL
);
5259 assert_return(s
->type
== SOURCE_MEMORY_PRESSURE
, -EDOM
);
5260 assert_return(ty
, -EINVAL
);
5261 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5263 if (!STR_IN_SET(ty
, "some", "full"))
5266 if (s
->memory_pressure
.locked
) /* Refuse adjusting parameters, if caller told us how to watch for events */
5269 char* space
= memchr(s
->memory_pressure
.write_buffer
, ' ', s
->memory_pressure
.write_buffer_size
);
5273 size_t l
= (char*) space
- (char*) s
->memory_pressure
.write_buffer
;
5274 b
= memdup_suffix0(s
->memory_pressure
.write_buffer
, l
);
5277 if (!STR_IN_SET(b
, "some", "full"))
5283 size_t nl
= strlen(ty
) + (s
->memory_pressure
.write_buffer_size
- l
);
5288 memcpy(stpcpy(w
, ty
), space
, (s
->memory_pressure
.write_buffer_size
- l
));
5290 free_and_replace(s
->memory_pressure
.write_buffer
, w
);
5291 s
->memory_pressure
.write_buffer_size
= nl
;
5292 s
->memory_pressure
.locked
= false;
5297 _public_
int sd_event_source_set_memory_pressure_period(sd_event_source
*s
, uint64_t threshold_usec
, uint64_t window_usec
) {
5298 _cleanup_free_
char *b
= NULL
;
5299 _cleanup_free_
void *w
= NULL
;
5301 assert_return(s
, -EINVAL
);
5302 assert_return(s
->type
== SOURCE_MEMORY_PRESSURE
, -EDOM
);
5303 assert_return(!event_origin_changed(s
->event
), -ECHILD
);
5305 if (threshold_usec
<= 0 || threshold_usec
>= UINT64_MAX
)
5307 if (window_usec
<= 0 || window_usec
>= UINT64_MAX
)
5309 if (threshold_usec
> window_usec
)
5312 if (s
->memory_pressure
.locked
) /* Refuse adjusting parameters, if caller told us how to watch for events */
5315 char* space
= memchr(s
->memory_pressure
.write_buffer
, ' ', s
->memory_pressure
.write_buffer_size
);
5319 size_t l
= (char*) space
- (char*) s
->memory_pressure
.write_buffer
;
5320 b
= memdup_suffix0(s
->memory_pressure
.write_buffer
, l
);
5323 if (!STR_IN_SET(b
, "some", "full"))
5326 if (asprintf((char**) &w
,
5327 "%s " USEC_FMT
" " USEC_FMT
"",
5334 if (memcmp_nn(s
->memory_pressure
.write_buffer
, s
->memory_pressure
.write_buffer_size
, w
, l
) == 0)
5337 free_and_replace(s
->memory_pressure
.write_buffer
, w
);
5338 s
->memory_pressure
.write_buffer_size
= l
;
5339 s
->memory_pressure
.locked
= false;