1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
4 #include <sys/timerfd.h>
10 #include "sd-messages.h"
12 #include "alloc-util.h"
14 #include "event-source.h"
17 #include "glyph-util.h"
19 #include "hexdecoct.h"
21 #include "logarithm.h"
23 #include "mallinfo-util.h"
24 #include "memory-util.h"
25 #include "missing_magic.h"
26 #include "missing_syscall.h"
27 #include "path-util.h"
29 #include "process-util.h"
32 #include "signal-util.h"
33 #include "socket-util.h"
34 #include "stat-util.h"
35 #include "string-table.h"
36 #include "string-util.h"
38 #include "time-util.h"
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
42 static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source
*s
) {
43 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
45 s
->type
== SOURCE_CHILD
&&
46 s
->child
.pidfd
>= 0 &&
47 s
->child
.options
== WEXITED
;
50 static bool event_source_is_online(sd_event_source
*s
) {
52 return s
->enabled
!= SD_EVENT_OFF
&& !s
->ratelimited
;
55 static bool event_source_is_offline(sd_event_source
*s
) {
57 return s
->enabled
== SD_EVENT_OFF
|| s
->ratelimited
;
60 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
62 [SOURCE_TIME_REALTIME
] = "realtime",
63 [SOURCE_TIME_BOOTTIME
] = "bootime",
64 [SOURCE_TIME_MONOTONIC
] = "monotonic",
65 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
66 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
67 [SOURCE_SIGNAL
] = "signal",
68 [SOURCE_CHILD
] = "child",
69 [SOURCE_DEFER
] = "defer",
70 [SOURCE_POST
] = "post",
71 [SOURCE_EXIT
] = "exit",
72 [SOURCE_WATCHDOG
] = "watchdog",
73 [SOURCE_INOTIFY
] = "inotify",
74 [SOURCE_MEMORY_PRESSURE
] = "memory-pressure",
77 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
79 #define EVENT_SOURCE_IS_TIME(t) \
81 SOURCE_TIME_REALTIME, \
82 SOURCE_TIME_BOOTTIME, \
83 SOURCE_TIME_MONOTONIC, \
84 SOURCE_TIME_REALTIME_ALARM, \
85 SOURCE_TIME_BOOTTIME_ALARM)
87 #define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
90 SOURCE_TIME_REALTIME, \
91 SOURCE_TIME_BOOTTIME, \
92 SOURCE_TIME_MONOTONIC, \
93 SOURCE_TIME_REALTIME_ALARM, \
94 SOURCE_TIME_BOOTTIME_ALARM, \
98 SOURCE_MEMORY_PRESSURE)
100 /* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
101 * Time sources and ratelimited sources can be passed, so effectively this is the same as the
102 * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */
103 #define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t)
114 /* timerfd_create() only supports these five clocks so far. We
115 * can add support for more clocks when the kernel learns to
116 * deal with them, too. */
117 struct clock_data realtime
;
118 struct clock_data boottime
;
119 struct clock_data monotonic
;
120 struct clock_data realtime_alarm
;
121 struct clock_data boottime_alarm
;
125 sd_event_source
**signal_sources
; /* indexed by signal number */
126 Hashmap
*signal_data
; /* indexed by priority */
128 Hashmap
*child_sources
;
129 unsigned n_online_child_sources
;
135 Hashmap
*inotify_data
; /* indexed by priority */
137 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
138 LIST_HEAD(struct inode_data
, inode_data_to_close_list
);
140 /* A list of inotify objects that already have events buffered which aren't processed yet */
141 LIST_HEAD(struct inotify_data
, buffered_inotify_data_list
);
143 /* A list of memory pressure event sources that still need their subscription string written */
144 LIST_HEAD(sd_event_source
, memory_pressure_write_list
);
149 triple_timestamp timestamp
;
152 bool exit_requested
:1;
153 bool need_process_child
:1;
155 bool profile_delays
:1;
160 sd_event
**default_event_ptr
;
162 usec_t watchdog_last
, watchdog_period
;
166 struct epoll_event
*event_queue
;
168 LIST_HEAD(sd_event_source
, sources
);
170 sd_event_source
*sigint_event_source
, *sigterm_event_source
;
172 usec_t last_run_usec
, last_log_usec
;
173 unsigned delays
[sizeof(usec_t
) * 8];
176 static thread_local sd_event
*default_event
= NULL
;
178 static void source_disconnect(sd_event_source
*s
);
179 static void event_gc_inode_data(sd_event
*e
, struct inode_data
*d
);
181 static sd_event
*event_resolve(sd_event
*e
) {
182 return e
== SD_EVENT_DEFAULT
? default_event
: e
;
185 static int pending_prioq_compare(const void *a
, const void *b
) {
186 const sd_event_source
*x
= a
, *y
= b
;
192 /* Enabled ones first */
193 r
= CMP(x
->enabled
== SD_EVENT_OFF
, y
->enabled
== SD_EVENT_OFF
);
197 /* Non rate-limited ones first. */
198 r
= CMP(!!x
->ratelimited
, !!y
->ratelimited
);
202 /* Lower priority values first */
203 r
= CMP(x
->priority
, y
->priority
);
207 /* Older entries first */
208 return CMP(x
->pending_iteration
, y
->pending_iteration
);
211 static int prepare_prioq_compare(const void *a
, const void *b
) {
212 const sd_event_source
*x
= a
, *y
= b
;
218 /* Enabled ones first */
219 r
= CMP(x
->enabled
== SD_EVENT_OFF
, y
->enabled
== SD_EVENT_OFF
);
223 /* Non rate-limited ones first. */
224 r
= CMP(!!x
->ratelimited
, !!y
->ratelimited
);
228 /* Move most recently prepared ones last, so that we can stop
229 * preparing as soon as we hit one that has already been
230 * prepared in the current iteration */
231 r
= CMP(x
->prepare_iteration
, y
->prepare_iteration
);
235 /* Lower priority values first */
236 return CMP(x
->priority
, y
->priority
);
239 static usec_t
time_event_source_next(const sd_event_source
*s
) {
242 /* We have two kinds of event sources that have elapsation times associated with them: the actual
243 * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
244 * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
245 * looking at here. */
247 if (s
->ratelimited
) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
248 assert(s
->rate_limit
.begin
!= 0);
249 assert(s
->rate_limit
.interval
!= 0);
250 return usec_add(s
->rate_limit
.begin
, s
->rate_limit
.interval
);
253 /* Otherwise this must be a time event source, if not ratelimited */
254 if (EVENT_SOURCE_IS_TIME(s
->type
))
257 return USEC_INFINITY
;
260 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
263 if (s
->ratelimited
) { /* For ratelimited stuff the earliest and the latest time shall actually be the
264 * same, as we should avoid adding additional inaccuracy on an inaccuracy time
266 assert(s
->rate_limit
.begin
!= 0);
267 assert(s
->rate_limit
.interval
!= 0);
268 return usec_add(s
->rate_limit
.begin
, s
->rate_limit
.interval
);
271 /* Must be a time event source, if not ratelimited */
272 if (EVENT_SOURCE_IS_TIME(s
->type
))
273 return usec_add(s
->time
.next
, s
->time
.accuracy
);
275 return USEC_INFINITY
;
278 static bool event_source_timer_candidate(const sd_event_source
*s
) {
281 /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
282 * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
283 return !s
->pending
|| s
->ratelimited
;
286 static int time_prioq_compare(const void *a
, const void *b
, usec_t (*time_func
)(const sd_event_source
*s
)) {
287 const sd_event_source
*x
= a
, *y
= b
;
290 /* Enabled ones first */
291 r
= CMP(x
->enabled
== SD_EVENT_OFF
, y
->enabled
== SD_EVENT_OFF
);
295 /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
296 r
= CMP(!event_source_timer_candidate(x
), !event_source_timer_candidate(y
));
301 return CMP(time_func(x
), time_func(y
));
304 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
305 return time_prioq_compare(a
, b
, time_event_source_next
);
308 static int latest_time_prioq_compare(const void *a
, const void *b
) {
309 return time_prioq_compare(a
, b
, time_event_source_latest
);
312 static int exit_prioq_compare(const void *a
, const void *b
) {
313 const sd_event_source
*x
= a
, *y
= b
;
316 assert(x
->type
== SOURCE_EXIT
);
317 assert(y
->type
== SOURCE_EXIT
);
319 /* Enabled ones first */
320 r
= CMP(x
->enabled
== SD_EVENT_OFF
, y
->enabled
== SD_EVENT_OFF
);
324 /* Lower priority values first */
325 return CMP(x
->priority
, y
->priority
);
328 static void free_clock_data(struct clock_data
*d
) {
330 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
333 prioq_free(d
->earliest
);
334 prioq_free(d
->latest
);
337 static sd_event
*event_free(sd_event
*e
) {
342 e
->sigterm_event_source
= sd_event_source_unref(e
->sigterm_event_source
);
343 e
->sigint_event_source
= sd_event_source_unref(e
->sigint_event_source
);
345 while ((s
= e
->sources
)) {
347 source_disconnect(s
);
348 sd_event_source_unref(s
);
351 assert(e
->n_sources
== 0);
353 if (e
->default_event_ptr
)
354 *(e
->default_event_ptr
) = NULL
;
356 safe_close(e
->epoll_fd
);
357 safe_close(e
->watchdog_fd
);
359 free_clock_data(&e
->realtime
);
360 free_clock_data(&e
->boottime
);
361 free_clock_data(&e
->monotonic
);
362 free_clock_data(&e
->realtime_alarm
);
363 free_clock_data(&e
->boottime_alarm
);
365 prioq_free(e
->pending
);
366 prioq_free(e
->prepare
);
369 free(e
->signal_sources
);
370 hashmap_free(e
->signal_data
);
372 hashmap_free(e
->inotify_data
);
374 hashmap_free(e
->child_sources
);
375 set_free(e
->post_sources
);
377 free(e
->event_queue
);
382 _public_
int sd_event_new(sd_event
** ret
) {
386 assert_return(ret
, -EINVAL
);
388 e
= new(sd_event
, 1);
395 .watchdog_fd
= -EBADF
,
396 .realtime
.wakeup
= WAKEUP_CLOCK_DATA
,
397 .realtime
.fd
= -EBADF
,
398 .realtime
.next
= USEC_INFINITY
,
399 .boottime
.wakeup
= WAKEUP_CLOCK_DATA
,
400 .boottime
.fd
= -EBADF
,
401 .boottime
.next
= USEC_INFINITY
,
402 .monotonic
.wakeup
= WAKEUP_CLOCK_DATA
,
403 .monotonic
.fd
= -EBADF
,
404 .monotonic
.next
= USEC_INFINITY
,
405 .realtime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
406 .realtime_alarm
.fd
= -EBADF
,
407 .realtime_alarm
.next
= USEC_INFINITY
,
408 .boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
,
409 .boottime_alarm
.fd
= -EBADF
,
410 .boottime_alarm
.next
= USEC_INFINITY
,
411 .perturb
= USEC_INFINITY
,
412 .original_pid
= getpid_cached(),
415 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
419 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
420 if (e
->epoll_fd
< 0) {
425 e
->epoll_fd
= fd_move_above_stdio(e
->epoll_fd
);
427 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
428 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 %s 2^63 us will be logged every 5s.",
429 special_glyph(SPECIAL_GLYPH_ELLIPSIS
));
430 e
->profile_delays
= true;
441 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event
, sd_event
, event_free
);
442 #define PROTECT_EVENT(e) \
443 _unused_ _cleanup_(sd_event_unrefp) sd_event *_ref = sd_event_ref(e);
445 _public_ sd_event_source
* sd_event_source_disable_unref(sd_event_source
*s
) {
447 (void) sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
448 return sd_event_source_unref(s
);
451 static bool event_pid_changed(sd_event
*e
) {
454 /* We don't support people creating an event loop and keeping
455 * it around over a fork(). Let's complain. */
457 return e
->original_pid
!= getpid_cached();
460 static void source_io_unregister(sd_event_source
*s
) {
462 assert(s
->type
== SOURCE_IO
);
464 if (event_pid_changed(s
->event
))
467 if (!s
->io
.registered
)
470 if (epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
) < 0)
471 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
472 strna(s
->description
), event_source_type_to_string(s
->type
));
474 s
->io
.registered
= false;
477 static int source_io_register(
483 assert(s
->type
== SOURCE_IO
);
484 assert(enabled
!= SD_EVENT_OFF
);
486 struct epoll_event ev
= {
487 .events
= events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
491 if (epoll_ctl(s
->event
->epoll_fd
,
492 s
->io
.registered
? EPOLL_CTL_MOD
: EPOLL_CTL_ADD
,
496 s
->io
.registered
= true;
501 static void source_child_pidfd_unregister(sd_event_source
*s
) {
503 assert(s
->type
== SOURCE_CHILD
);
505 if (event_pid_changed(s
->event
))
508 if (!s
->child
.registered
)
511 if (EVENT_SOURCE_WATCH_PIDFD(s
))
512 if (epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->child
.pidfd
, NULL
) < 0)
513 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
514 strna(s
->description
), event_source_type_to_string(s
->type
));
516 s
->child
.registered
= false;
519 static int source_child_pidfd_register(sd_event_source
*s
, int enabled
) {
521 assert(s
->type
== SOURCE_CHILD
);
522 assert(enabled
!= SD_EVENT_OFF
);
524 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
525 struct epoll_event ev
= {
526 .events
= EPOLLIN
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0),
530 if (epoll_ctl(s
->event
->epoll_fd
,
531 s
->child
.registered
? EPOLL_CTL_MOD
: EPOLL_CTL_ADD
,
532 s
->child
.pidfd
, &ev
) < 0)
536 s
->child
.registered
= true;
540 static void source_memory_pressure_unregister(sd_event_source
*s
) {
542 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
544 if (event_pid_changed(s
->event
))
547 if (!s
->memory_pressure
.registered
)
550 if (epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->memory_pressure
.fd
, NULL
) < 0)
551 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
552 strna(s
->description
), event_source_type_to_string(s
->type
));
554 s
->memory_pressure
.registered
= false;
557 static int source_memory_pressure_register(sd_event_source
*s
, int enabled
) {
559 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
560 assert(enabled
!= SD_EVENT_OFF
);
562 struct epoll_event ev
= {
563 .events
= s
->memory_pressure
.write_buffer_size
> 0 ? EPOLLOUT
:
564 (s
->memory_pressure
.events
| (enabled
== SD_EVENT_ONESHOT
? EPOLLONESHOT
: 0)),
568 if (epoll_ctl(s
->event
->epoll_fd
,
569 s
->memory_pressure
.registered
? EPOLL_CTL_MOD
: EPOLL_CTL_ADD
,
570 s
->memory_pressure
.fd
, &ev
) < 0)
573 s
->memory_pressure
.registered
= true;
577 static void source_memory_pressure_add_to_write_list(sd_event_source
*s
) {
579 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
581 if (s
->memory_pressure
.in_write_list
)
584 LIST_PREPEND(memory_pressure
.write_list
, s
->event
->memory_pressure_write_list
, s
);
585 s
->memory_pressure
.in_write_list
= true;
588 static void source_memory_pressure_remove_from_write_list(sd_event_source
*s
) {
590 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
592 if (!s
->memory_pressure
.in_write_list
)
595 LIST_REMOVE(memory_pressure
.write_list
, s
->event
->memory_pressure_write_list
, s
);
596 s
->memory_pressure
.in_write_list
= false;
599 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
603 case SOURCE_TIME_REALTIME
:
604 return CLOCK_REALTIME
;
606 case SOURCE_TIME_BOOTTIME
:
607 return CLOCK_BOOTTIME
;
609 case SOURCE_TIME_MONOTONIC
:
610 return CLOCK_MONOTONIC
;
612 case SOURCE_TIME_REALTIME_ALARM
:
613 return CLOCK_REALTIME_ALARM
;
615 case SOURCE_TIME_BOOTTIME_ALARM
:
616 return CLOCK_BOOTTIME_ALARM
;
619 return (clockid_t
) -1;
623 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
628 return SOURCE_TIME_REALTIME
;
631 return SOURCE_TIME_BOOTTIME
;
633 case CLOCK_MONOTONIC
:
634 return SOURCE_TIME_MONOTONIC
;
636 case CLOCK_REALTIME_ALARM
:
637 return SOURCE_TIME_REALTIME_ALARM
;
639 case CLOCK_BOOTTIME_ALARM
:
640 return SOURCE_TIME_BOOTTIME_ALARM
;
643 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
647 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
652 case SOURCE_TIME_REALTIME
:
655 case SOURCE_TIME_BOOTTIME
:
658 case SOURCE_TIME_MONOTONIC
:
659 return &e
->monotonic
;
661 case SOURCE_TIME_REALTIME_ALARM
:
662 return &e
->realtime_alarm
;
664 case SOURCE_TIME_BOOTTIME_ALARM
:
665 return &e
->boottime_alarm
;
672 static void event_free_signal_data(sd_event
*e
, struct signal_data
*d
) {
678 hashmap_remove(e
->signal_data
, &d
->priority
);
683 static int event_make_signal_data(
686 struct signal_data
**ret
) {
688 struct signal_data
*d
;
696 if (event_pid_changed(e
))
699 if (e
->signal_sources
&& e
->signal_sources
[sig
])
700 priority
= e
->signal_sources
[sig
]->priority
;
702 priority
= SD_EVENT_PRIORITY_NORMAL
;
704 d
= hashmap_get(e
->signal_data
, &priority
);
706 if (sigismember(&d
->sigset
, sig
) > 0) {
712 d
= new(struct signal_data
, 1);
716 *d
= (struct signal_data
) {
717 .wakeup
= WAKEUP_SIGNAL_DATA
,
719 .priority
= priority
,
722 r
= hashmap_ensure_put(&e
->signal_data
, &uint64_hash_ops
, &d
->priority
, d
);
732 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
734 r
= signalfd(d
->fd
>= 0 ? d
->fd
: -1, /* the first arg must be -1 or a valid signalfd */
736 SFD_NONBLOCK
|SFD_CLOEXEC
);
750 d
->fd
= fd_move_above_stdio(r
);
752 struct epoll_event ev
= {
757 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
769 event_free_signal_data(e
, d
);
774 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
778 /* Turns off the specified signal in the signal data
779 * object. If the signal mask of the object becomes empty that
782 if (sigismember(&d
->sigset
, sig
) == 0)
785 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
787 if (sigisemptyset(&d
->sigset
)) {
788 /* If all the mask is all-zero we can get rid of the structure */
789 event_free_signal_data(e
, d
);
793 if (event_pid_changed(e
))
798 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
799 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
802 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
803 struct signal_data
*d
;
804 static const int64_t zero_priority
= 0;
808 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
809 * and possibly drop the signalfd for it. */
811 if (sig
== SIGCHLD
&&
812 e
->n_online_child_sources
> 0)
815 if (e
->signal_sources
&&
816 e
->signal_sources
[sig
] &&
817 event_source_is_online(e
->signal_sources
[sig
]))
821 * The specified signal might be enabled in three different queues:
823 * 1) the one that belongs to the priority passed (if it is non-NULL)
824 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
825 * 3) the 0 priority (to cover the SIGCHLD case)
827 * Hence, let's remove it from all three here.
831 d
= hashmap_get(e
->signal_data
, priority
);
833 event_unmask_signal_data(e
, d
, sig
);
836 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
837 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
839 event_unmask_signal_data(e
, d
, sig
);
842 d
= hashmap_get(e
->signal_data
, &zero_priority
);
844 event_unmask_signal_data(e
, d
, sig
);
847 static void event_source_pp_prioq_reshuffle(sd_event_source
*s
) {
850 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
851 * they are enabled/disabled or marked pending and such. */
854 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
857 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
860 static void event_source_time_prioq_reshuffle(sd_event_source
*s
) {
861 struct clock_data
*d
;
865 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
866 * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered
870 d
= &s
->event
->monotonic
;
871 else if (EVENT_SOURCE_IS_TIME(s
->type
))
872 assert_se(d
= event_get_clock_data(s
->event
, s
->type
));
874 return; /* no-op for an event source which is neither a timer nor ratelimited. */
876 prioq_reshuffle(d
->earliest
, s
, &s
->earliest_index
);
877 prioq_reshuffle(d
->latest
, s
, &s
->latest_index
);
878 d
->needs_rearm
= true;
881 static void event_source_time_prioq_remove(
883 struct clock_data
*d
) {
888 prioq_remove(d
->earliest
, s
, &s
->earliest_index
);
889 prioq_remove(d
->latest
, s
, &s
->latest_index
);
890 s
->earliest_index
= s
->latest_index
= PRIOQ_IDX_NULL
;
891 d
->needs_rearm
= true;
894 static void source_disconnect(sd_event_source
*s
) {
903 assert(s
->event
->n_sources
> 0);
909 source_io_unregister(s
);
913 case SOURCE_TIME_REALTIME
:
914 case SOURCE_TIME_BOOTTIME
:
915 case SOURCE_TIME_MONOTONIC
:
916 case SOURCE_TIME_REALTIME_ALARM
:
917 case SOURCE_TIME_BOOTTIME_ALARM
:
918 /* Only remove this event source from the time event source here if it is not ratelimited. If
919 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
920 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
922 if (!s
->ratelimited
) {
923 struct clock_data
*d
;
924 assert_se(d
= event_get_clock_data(s
->event
, s
->type
));
925 event_source_time_prioq_remove(s
, d
);
931 if (s
->signal
.sig
> 0) {
933 if (s
->event
->signal_sources
)
934 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
936 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
938 if (s
->signal
.unblock
) {
941 if (sigemptyset(&new_ss
) < 0)
942 log_debug_errno(errno
, "Failed to reset signal set, ignoring: %m");
943 else if (sigaddset(&new_ss
, s
->signal
.sig
) < 0)
944 log_debug_errno(errno
, "Failed to add signal %i to signal mask, ignoring: %m", s
->signal
.sig
);
946 r
= pthread_sigmask(SIG_UNBLOCK
, &new_ss
, NULL
);
948 log_debug_errno(r
, "Failed to unblock signal %i, ignoring: %m", s
->signal
.sig
);
956 if (event_pid_changed(s
->event
))
957 s
->child
.process_owned
= false;
959 if (s
->child
.pid
> 0) {
960 if (event_source_is_online(s
)) {
961 assert(s
->event
->n_online_child_sources
> 0);
962 s
->event
->n_online_child_sources
--;
965 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
968 if (EVENT_SOURCE_WATCH_PIDFD(s
))
969 source_child_pidfd_unregister(s
);
971 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
980 set_remove(s
->event
->post_sources
, s
);
984 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
987 case SOURCE_INOTIFY
: {
988 struct inode_data
*inode_data
;
990 inode_data
= s
->inotify
.inode_data
;
992 struct inotify_data
*inotify_data
;
993 assert_se(inotify_data
= inode_data
->inotify_data
);
995 /* Detach this event source from the inode object */
996 LIST_REMOVE(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
997 s
->inotify
.inode_data
= NULL
;
1000 assert(inotify_data
->n_pending
> 0);
1001 inotify_data
->n_pending
--;
1004 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
1005 * continued to being watched. That's because inotify doesn't really have an API for that: we
1006 * can only change watch masks with access to the original inode either by fd or by path. But
1007 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
1008 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
1009 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
1010 * there), but given the need for open_by_handle_at() which is privileged and not universally
1011 * available this would be quite an incomplete solution. Hence we go the other way, leave the
1012 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
1013 * anymore after reception. Yes, this sucks, but … Linux … */
1015 /* Maybe release the inode data (and its inotify) */
1016 event_gc_inode_data(s
->event
, inode_data
);
1022 case SOURCE_MEMORY_PRESSURE
:
1023 source_memory_pressure_remove_from_write_list(s
);
1024 source_memory_pressure_unregister(s
);
1028 assert_not_reached();
1032 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
1035 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1038 event_source_time_prioq_remove(s
, &s
->event
->monotonic
);
1040 event
= TAKE_PTR(s
->event
);
1041 LIST_REMOVE(sources
, event
->sources
, s
);
1044 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
1045 * pidfd associated with this event source, which we'll do only on source_free(). */
1048 sd_event_unref(event
);
1051 static sd_event_source
* source_free(sd_event_source
*s
) {
1054 source_disconnect(s
);
1056 if (s
->type
== SOURCE_IO
&& s
->io
.owned
)
1057 s
->io
.fd
= safe_close(s
->io
.fd
);
1059 if (s
->type
== SOURCE_CHILD
) {
1060 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
1062 if (s
->child
.process_owned
) {
1064 if (!s
->child
.exited
) {
1067 if (s
->child
.pidfd
>= 0) {
1068 if (pidfd_send_signal(s
->child
.pidfd
, SIGKILL
, NULL
, 0) < 0) {
1069 if (errno
== ESRCH
) /* Already dead */
1071 else if (!ERRNO_IS_NOT_SUPPORTED(errno
))
1072 log_debug_errno(errno
, "Failed to kill process " PID_FMT
" via pidfd_send_signal(), re-trying via kill(): %m",
1079 if (kill(s
->child
.pid
, SIGKILL
) < 0)
1080 if (errno
!= ESRCH
) /* Already dead */
1081 log_debug_errno(errno
, "Failed to kill process " PID_FMT
" via kill(), ignoring: %m",
1085 if (!s
->child
.waited
) {
1088 /* Reap the child if we can */
1089 (void) waitid(P_PID
, s
->child
.pid
, &si
, WEXITED
);
1093 if (s
->child
.pidfd_owned
)
1094 s
->child
.pidfd
= safe_close(s
->child
.pidfd
);
1097 if (s
->type
== SOURCE_MEMORY_PRESSURE
) {
1098 s
->memory_pressure
.fd
= safe_close(s
->memory_pressure
.fd
);
1099 s
->memory_pressure
.write_buffer
= mfree(s
->memory_pressure
.write_buffer
);
1102 if (s
->destroy_callback
)
1103 s
->destroy_callback(s
->userdata
);
1105 free(s
->description
);
1108 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source
*, source_free
);
1110 static int source_set_pending(sd_event_source
*s
, bool b
) {
1114 assert(s
->type
!= SOURCE_EXIT
);
1116 if (s
->pending
== b
)
1122 s
->pending_iteration
= s
->event
->iteration
;
1124 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
1130 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
1132 if (EVENT_SOURCE_IS_TIME(s
->type
))
1133 event_source_time_prioq_reshuffle(s
);
1135 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
1136 struct signal_data
*d
;
1138 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
1139 if (d
&& d
->current
== s
)
1143 if (s
->type
== SOURCE_INOTIFY
) {
1145 assert(s
->inotify
.inode_data
);
1146 assert(s
->inotify
.inode_data
->inotify_data
);
1149 s
->inotify
.inode_data
->inotify_data
->n_pending
++;
1151 assert(s
->inotify
.inode_data
->inotify_data
->n_pending
> 0);
1152 s
->inotify
.inode_data
->inotify_data
->n_pending
--;
1159 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
1161 /* Let's allocate exactly what we need. Note that the difference of the smallest event source
1162 * structure to the largest is 144 bytes on x86-64 at the time of writing, i.e. more than two cache
1164 static const size_t size_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
1165 [SOURCE_IO
] = endoffsetof_field(sd_event_source
, io
),
1166 [SOURCE_TIME_REALTIME
] = endoffsetof_field(sd_event_source
, time
),
1167 [SOURCE_TIME_BOOTTIME
] = endoffsetof_field(sd_event_source
, time
),
1168 [SOURCE_TIME_MONOTONIC
] = endoffsetof_field(sd_event_source
, time
),
1169 [SOURCE_TIME_REALTIME_ALARM
] = endoffsetof_field(sd_event_source
, time
),
1170 [SOURCE_TIME_BOOTTIME_ALARM
] = endoffsetof_field(sd_event_source
, time
),
1171 [SOURCE_SIGNAL
] = endoffsetof_field(sd_event_source
, signal
),
1172 [SOURCE_CHILD
] = endoffsetof_field(sd_event_source
, child
),
1173 [SOURCE_DEFER
] = endoffsetof_field(sd_event_source
, defer
),
1174 [SOURCE_POST
] = endoffsetof_field(sd_event_source
, post
),
1175 [SOURCE_EXIT
] = endoffsetof_field(sd_event_source
, exit
),
1176 [SOURCE_INOTIFY
] = endoffsetof_field(sd_event_source
, inotify
),
1177 [SOURCE_MEMORY_PRESSURE
] = endoffsetof_field(sd_event_source
, memory_pressure
),
1184 assert(type
< _SOURCE_EVENT_SOURCE_TYPE_MAX
);
1185 assert(size_table
[type
] > 0);
1187 /* We use expand_to_usable() here to tell gcc that it should consider this an object of the full
1188 * size, even if we only allocate the initial part we need. */
1189 s
= expand_to_usable(malloc0(size_table
[type
]), sizeof(sd_event_source
));
1193 /* Note: we cannot use compound initialization here, because sizeof(sd_event_source) is likely larger
1194 * than what we allocated here. */
1197 s
->floating
= floating
;
1199 s
->pending_index
= PRIOQ_IDX_NULL
;
1200 s
->prepare_index
= PRIOQ_IDX_NULL
;
1205 LIST_PREPEND(sources
, e
->sources
, s
);
1211 static int io_exit_callback(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
1214 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1217 _public_
int sd_event_add_io(
1219 sd_event_source
**ret
,
1222 sd_event_io_handler_t callback
,
1225 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1228 assert_return(e
, -EINVAL
);
1229 assert_return(e
= event_resolve(e
), -ENOPKG
);
1230 assert_return(fd
>= 0, -EBADF
);
1231 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1232 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1233 assert_return(!event_pid_changed(e
), -ECHILD
);
1236 callback
= io_exit_callback
;
1238 s
= source_new(e
, !ret
, SOURCE_IO
);
1242 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1244 s
->io
.events
= events
;
1245 s
->io
.callback
= callback
;
1246 s
->userdata
= userdata
;
1247 s
->enabled
= SD_EVENT_ON
;
1249 r
= source_io_register(s
, s
->enabled
, events
);
1260 static void initialize_perturb(sd_event
*e
) {
1263 /* When we sleep for longer, we try to realign the wakeup to the same time within each
1264 * minute/second/250ms, so that events all across the system can be coalesced into a single CPU
1265 * wakeup. However, let's take some system-specific randomness for this value, so that in a network
1266 * of systems with synced clocks timer events are distributed a bit. Here, we calculate a
1267 * perturbation usec offset from the boot ID (or machine ID if failed, e.g. /proc is not mounted). */
1269 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1272 if (sd_id128_get_boot(&id
) >= 0 || sd_id128_get_machine(&id
) > 0)
1273 e
->perturb
= (id
.qwords
[0] ^ id
.qwords
[1]) % USEC_PER_MINUTE
;
1275 e
->perturb
= 0; /* This is a super early process without /proc and /etc ?? */
1278 static int event_setup_timer_fd(
1280 struct clock_data
*d
,
1286 if (_likely_(d
->fd
>= 0))
1289 _cleanup_close_
int fd
= -EBADF
;
1291 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1295 fd
= fd_move_above_stdio(fd
);
1297 struct epoll_event ev
= {
1302 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
) < 0)
1305 d
->fd
= TAKE_FD(fd
);
1309 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1312 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1315 static int setup_clock_data(sd_event
*e
, struct clock_data
*d
, clockid_t clock
) {
1321 r
= event_setup_timer_fd(e
, d
, clock
);
1326 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1330 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1337 static int event_source_time_prioq_put(
1339 struct clock_data
*d
) {
1345 assert(EVENT_SOURCE_USES_TIME_PRIOQ(s
->type
));
1347 r
= prioq_put(d
->earliest
, s
, &s
->earliest_index
);
1351 r
= prioq_put(d
->latest
, s
, &s
->latest_index
);
1353 assert_se(prioq_remove(d
->earliest
, s
, &s
->earliest_index
) > 0);
1354 s
->earliest_index
= PRIOQ_IDX_NULL
;
1358 d
->needs_rearm
= true;
1362 _public_
int sd_event_add_time(
1364 sd_event_source
**ret
,
1368 sd_event_time_handler_t callback
,
1371 EventSourceType type
;
1372 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1373 struct clock_data
*d
;
1376 assert_return(e
, -EINVAL
);
1377 assert_return(e
= event_resolve(e
), -ENOPKG
);
1378 assert_return(accuracy
!= UINT64_MAX
, -EINVAL
);
1379 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1380 assert_return(!event_pid_changed(e
), -ECHILD
);
1382 if (!clock_supported(clock
)) /* Checks whether the kernel supports the clock */
1385 type
= clock_to_event_source_type(clock
); /* checks whether sd-event supports this clock */
1390 callback
= time_exit_callback
;
1392 assert_se(d
= event_get_clock_data(e
, type
));
1394 r
= setup_clock_data(e
, d
, clock
);
1398 s
= source_new(e
, !ret
, type
);
1402 s
->time
.next
= usec
;
1403 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1404 s
->time
.callback
= callback
;
1405 s
->earliest_index
= s
->latest_index
= PRIOQ_IDX_NULL
;
1406 s
->userdata
= userdata
;
1407 s
->enabled
= SD_EVENT_ONESHOT
;
1409 r
= event_source_time_prioq_put(s
, d
);
1420 _public_
int sd_event_add_time_relative(
1422 sd_event_source
**ret
,
1426 sd_event_time_handler_t callback
,
1432 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1433 * checks for overflow. */
1435 r
= sd_event_now(e
, clock
, &t
);
1439 if (usec
>= USEC_INFINITY
- t
)
1442 return sd_event_add_time(e
, ret
, clock
, t
+ usec
, accuracy
, callback
, userdata
);
1445 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1448 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1451 _public_
int sd_event_add_signal(
1453 sd_event_source
**ret
,
1455 sd_event_signal_handler_t callback
,
1458 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1459 struct signal_data
*d
;
1464 assert_return(e
, -EINVAL
);
1465 assert_return(e
= event_resolve(e
), -ENOPKG
);
1466 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1467 assert_return(!event_pid_changed(e
), -ECHILD
);
1469 /* Let's make sure our special flag stays outside of the valid signal range */
1470 assert_cc(_NSIG
< SD_EVENT_SIGNAL_PROCMASK
);
1472 if (sig
& SD_EVENT_SIGNAL_PROCMASK
) {
1473 sig
&= ~SD_EVENT_SIGNAL_PROCMASK
;
1474 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1478 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
1480 r
= signal_is_blocked(sig
);
1490 callback
= signal_exit_callback
;
1492 if (!e
->signal_sources
) {
1493 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1494 if (!e
->signal_sources
)
1496 } else if (e
->signal_sources
[sig
])
1499 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1503 s
->signal
.sig
= sig
;
1504 s
->signal
.callback
= callback
;
1505 s
->userdata
= userdata
;
1506 s
->enabled
= SD_EVENT_ON
;
1508 e
->signal_sources
[sig
] = s
;
1513 if (sigemptyset(&new_ss
) < 0)
1516 if (sigaddset(&new_ss
, sig
) < 0)
1519 r
= pthread_sigmask(SIG_BLOCK
, &new_ss
, &old_ss
);
1523 r
= sigismember(&old_ss
, sig
);
1527 s
->signal
.unblock
= !r
;
1529 s
->signal
.unblock
= false;
1531 r
= event_make_signal_data(e
, sig
, &d
);
1533 if (s
->signal
.unblock
)
1534 (void) pthread_sigmask(SIG_UNBLOCK
, &new_ss
, NULL
);
1539 /* Use the signal name as description for the event source by default */
1540 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1549 static int child_exit_callback(sd_event_source
*s
, const siginfo_t
*si
, void *userdata
) {
1552 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1555 static bool shall_use_pidfd(void) {
1556 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1557 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1560 _public_
int sd_event_add_child(
1562 sd_event_source
**ret
,
1565 sd_event_child_handler_t callback
,
1568 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1571 assert_return(e
, -EINVAL
);
1572 assert_return(e
= event_resolve(e
), -ENOPKG
);
1573 assert_return(pid
> 1, -EINVAL
);
1574 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1575 assert_return(options
!= 0, -EINVAL
);
1576 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1577 assert_return(!event_pid_changed(e
), -ECHILD
);
1580 callback
= child_exit_callback
;
1582 if (e
->n_online_child_sources
== 0) {
1583 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1584 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1585 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1588 * (As an optimization we only do this check on the first child event source created.) */
1589 r
= signal_is_blocked(SIGCHLD
);
1596 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1600 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1603 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1607 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1608 s
->child
.options
= options
;
1609 s
->child
.callback
= callback
;
1610 s
->userdata
= userdata
;
1611 s
->enabled
= SD_EVENT_ONESHOT
;
1613 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1614 * pin the PID, and make regular waitid() handling race-free. */
1616 if (shall_use_pidfd()) {
1617 s
->child
.pidfd
= pidfd_open(pid
, 0);
1618 if (s
->child
.pidfd
< 0) {
1619 /* Propagate errors unless the syscall is not supported or blocked */
1620 if (!ERRNO_IS_NOT_SUPPORTED(errno
) && !ERRNO_IS_PRIVILEGE(errno
))
1623 s
->child
.pidfd_owned
= true; /* If we allocate the pidfd we own it by default */
1625 s
->child
.pidfd
= -EBADF
;
1627 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
1628 /* We have a pidfd and we only want to watch for exit */
1629 r
= source_child_pidfd_register(s
, s
->enabled
);
1634 /* We have no pidfd or we shall wait for some other event than WEXITED */
1635 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1639 e
->need_process_child
= true;
1642 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1646 /* These must be done after everything succeeds. */
1648 e
->n_online_child_sources
++;
1656 _public_
int sd_event_add_child_pidfd(
1658 sd_event_source
**ret
,
1661 sd_event_child_handler_t callback
,
1665 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1669 assert_return(e
, -EINVAL
);
1670 assert_return(e
= event_resolve(e
), -ENOPKG
);
1671 assert_return(pidfd
>= 0, -EBADF
);
1672 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1673 assert_return(options
!= 0, -EINVAL
);
1674 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1675 assert_return(!event_pid_changed(e
), -ECHILD
);
1678 callback
= child_exit_callback
;
1680 if (e
->n_online_child_sources
== 0) {
1681 r
= signal_is_blocked(SIGCHLD
);
1688 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1692 r
= pidfd_get_pid(pidfd
, &pid
);
1696 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1699 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1703 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1704 s
->child
.pidfd
= pidfd
;
1706 s
->child
.options
= options
;
1707 s
->child
.callback
= callback
;
1708 s
->child
.pidfd_owned
= false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1709 s
->userdata
= userdata
;
1710 s
->enabled
= SD_EVENT_ONESHOT
;
1712 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1716 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
1717 /* We only want to watch for WEXITED */
1718 r
= source_child_pidfd_register(s
, s
->enabled
);
1722 /* We shall wait for some other event than WEXITED */
1723 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1727 e
->need_process_child
= true;
1730 e
->n_online_child_sources
++;
1738 static int generic_exit_callback(sd_event_source
*s
, void *userdata
) {
1741 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1744 _public_
int sd_event_add_defer(
1746 sd_event_source
**ret
,
1747 sd_event_handler_t callback
,
1750 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1753 assert_return(e
, -EINVAL
);
1754 assert_return(e
= event_resolve(e
), -ENOPKG
);
1755 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1756 assert_return(!event_pid_changed(e
), -ECHILD
);
1759 callback
= generic_exit_callback
;
1761 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1765 s
->defer
.callback
= callback
;
1766 s
->userdata
= userdata
;
1767 s
->enabled
= SD_EVENT_ONESHOT
;
1769 r
= source_set_pending(s
, true);
1780 _public_
int sd_event_add_post(
1782 sd_event_source
**ret
,
1783 sd_event_handler_t callback
,
1786 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1789 assert_return(e
, -EINVAL
);
1790 assert_return(e
= event_resolve(e
), -ENOPKG
);
1791 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1792 assert_return(!event_pid_changed(e
), -ECHILD
);
1795 callback
= generic_exit_callback
;
1797 s
= source_new(e
, !ret
, SOURCE_POST
);
1801 s
->post
.callback
= callback
;
1802 s
->userdata
= userdata
;
1803 s
->enabled
= SD_EVENT_ON
;
1805 r
= set_ensure_put(&e
->post_sources
, NULL
, s
);
1817 _public_
int sd_event_add_exit(
1819 sd_event_source
**ret
,
1820 sd_event_handler_t callback
,
1823 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1826 assert_return(e
, -EINVAL
);
1827 assert_return(e
= event_resolve(e
), -ENOPKG
);
1828 assert_return(callback
, -EINVAL
);
1829 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1830 assert_return(!event_pid_changed(e
), -ECHILD
);
1832 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1836 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1840 s
->exit
.callback
= callback
;
1841 s
->userdata
= userdata
;
1842 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1843 s
->enabled
= SD_EVENT_ONESHOT
;
1845 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1856 int sd_event_trim_memory(void) {
1859 /* A default implementation of a memory pressure callback. Simply releases our own allocation caches
1860 * and glibc's. This is automatically used when people call sd_event_add_memory_pressure() with a
1861 * NULL callback parameter. */
1863 log_debug("Memory pressure event, trimming malloc() memory.");
1865 #if HAVE_GENERIC_MALLINFO
1866 generic_mallinfo before_mallinfo
= generic_mallinfo_get();
1869 usec_t before_timestamp
= now(CLOCK_MONOTONIC
);
1870 hashmap_trim_pools();
1872 usec_t after_timestamp
= now(CLOCK_MONOTONIC
);
1875 log_debug("Successfully trimmed some memory.");
1877 log_debug("Couldn't trim any memory.");
1879 usec_t period
= after_timestamp
- before_timestamp
;
1881 #if HAVE_GENERIC_MALLINFO
1882 generic_mallinfo after_mallinfo
= generic_mallinfo_get();
1883 size_t l
= LESS_BY((size_t) before_mallinfo
.hblkhd
, (size_t) after_mallinfo
.hblkhd
) +
1884 LESS_BY((size_t) before_mallinfo
.arena
, (size_t) after_mallinfo
.arena
);
1885 log_struct(LOG_DEBUG
,
1886 LOG_MESSAGE("Memory trimming took %s, returned %s to OS.",
1887 FORMAT_TIMESPAN(period
, 0),
1889 "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR
,
1890 "TRIMMED_BYTES=%zu", l
,
1891 "TRIMMED_USEC=" USEC_FMT
, period
);
1893 log_struct(LOG_DEBUG
,
1894 LOG_MESSAGE("Memory trimming took %s.",
1895 FORMAT_TIMESPAN(period
, 0)),
1896 "MESSAGE_ID=" SD_MESSAGE_MEMORY_TRIM_STR
,
1897 "TRIMMED_USEC=" USEC_FMT
, period
);
1903 static int memory_pressure_callback(sd_event_source
*s
, void *userdata
) {
1906 sd_event_trim_memory();
1910 _public_
int sd_event_add_memory_pressure(
1912 sd_event_source
**ret
,
1913 sd_event_handler_t callback
,
1916 _cleanup_free_
char *w
= NULL
;
1917 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
1918 _cleanup_close_
int path_fd
= -1, fd
= -1;
1919 _cleanup_free_
void *write_buffer
= NULL
;
1920 const char *watch
, *watch_fallback
, *env
;
1921 size_t write_buffer_size
= 0;
1927 assert_return(e
, -EINVAL
);
1928 assert_return(e
= event_resolve(e
), -ENOPKG
);
1929 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1930 assert_return(!event_pid_changed(e
), -ECHILD
);
1933 callback
= memory_pressure_callback
;
1935 s
= source_new(e
, !ret
, SOURCE_MEMORY_PRESSURE
);
1939 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
1940 s
->memory_pressure
.callback
= callback
;
1941 s
->userdata
= userdata
;
1942 s
->enabled
= SD_EVENT_ON
;
1943 s
->memory_pressure
.fd
= -EBADF
;
1945 env
= secure_getenv("MEMORY_PRESSURE_WATCH");
1947 if (isempty(env
) || path_equal(env
, "/dev/null"))
1948 return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN
),
1949 "Memory pressure logic is explicitly disabled via $MEMORY_PRESSURE_WATCH.");
1951 if (!path_is_absolute(env
) || !path_is_normalized(env
))
1952 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG
),
1953 "$MEMORY_PRESSURE_WATCH set to invalid path: %s", env
);
1957 env
= secure_getenv("MEMORY_PRESSURE_WRITE");
1959 r
= unbase64mem(env
, SIZE_MAX
, &write_buffer
, &write_buffer_size
);
1967 r
= is_pressure_supported();
1973 /* By default we want to watch memory pressure on the local cgroup, but we'll fall back on
1974 * the system wide pressure if for some reason we cannot (which could be: memory controller
1975 * not delegated to us, or PSI simply not available in the kernel). On legacy cgroupv1 we'll
1976 * only use the system-wide logic. */
1977 r
= cg_all_unified();
1981 watch
= "/proc/pressure/memory";
1983 _cleanup_free_
char *cg
= NULL
;
1985 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 0, &cg
);
1989 w
= path_join("/sys/fs/cgroup", cg
, "memory.pressure");
1994 watch_fallback
= "/proc/pressure/memory";
1997 /* Android uses three levels in its userspace low memory killer logic:
1998 * some 70000 1000000
1999 * some 100000 1000000
2000 * full 70000 1000000
2002 * GNOME's low memory monitor uses:
2003 * some 70000 1000000
2004 * some 100000 1000000
2005 * full 100000 1000000
2007 * We'll default to the middle level that both agree on */
2008 if (asprintf((char**) &write_buffer
,
2009 "%s " USEC_FMT
" " USEC_FMT
,
2010 MEMORY_PRESSURE_DEFAULT_TYPE
,
2011 MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC
,
2012 MEMORY_PRESSURE_DEFAULT_WINDOW_USEC
) < 0)
2015 write_buffer_size
= strlen(write_buffer
) + 1;
2019 path_fd
= open(watch
, O_PATH
|O_CLOEXEC
);
2021 if (errno
!= ENOENT
)
2024 /* We got ENOENT. Three options now: try the fallback if we have one, or return the error as
2025 * is (if based on user/env config), or return -EOPNOTSUPP (because we picked the path, and
2026 * the PSI service apparently is not supported) */
2027 if (!watch_fallback
)
2028 return locked
? -ENOENT
: -EOPNOTSUPP
;
2030 path_fd
= open(watch_fallback
, O_PATH
|O_CLOEXEC
);
2031 if (errno
== ENOENT
) /* PSI is not available in the kernel even under the fallback path? */
2037 if (fstat(path_fd
, &st
) < 0)
2040 if (S_ISSOCK(st
.st_mode
)) {
2041 fd
= socket(AF_UNIX
, SOCK_STREAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
2045 r
= connect_unix_path(fd
, path_fd
, NULL
);
2051 } else if (S_ISREG(st
.st_mode
) || S_ISFIFO(st
.st_mode
) || S_ISCHR(st
.st_mode
)) {
2052 fd
= fd_reopen(path_fd
, (write_buffer_size
> 0 ? O_RDWR
: O_RDONLY
) |O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
2056 if (S_ISREG(st
.st_mode
)) {
2059 /* If this is a regular file validate this is a procfs or cgroupfs file, where we look for EPOLLPRI */
2061 if (fstatfs(fd
, &sfs
) < 0)
2064 if (!is_fs_type(&sfs
, PROC_SUPER_MAGIC
) &&
2065 !is_fs_type(&sfs
, CGROUP2_SUPER_MAGIC
))
2070 /* For fifos and char devices just watch for EPOLLIN */
2073 } else if (S_ISDIR(st
.st_mode
))
2078 s
->memory_pressure
.fd
= TAKE_FD(fd
);
2079 s
->memory_pressure
.write_buffer
= TAKE_PTR(write_buffer
);
2080 s
->memory_pressure
.write_buffer_size
= write_buffer_size
;
2081 s
->memory_pressure
.events
= events
;
2082 s
->memory_pressure
.locked
= locked
;
2084 /* So here's the thing: if we are talking to PSI we need to write the watch string before adding the
2085 * fd to epoll (if we ignore this, then the watch won't work). Hence we'll not actually register the
2086 * fd with the epoll right-away. Instead, we just add the event source to a list of memory pressure
2087 * event sources on which writes must be executed before the first event loop iteration is
2088 * executed. (We could also write the data here, right away, but we want to give the caller the
2089 * freedom to call sd_event_source_set_memory_pressure_type() and
2090 * sd_event_source_set_memory_pressure_rate() before we write it. */
2092 if (s
->memory_pressure
.write_buffer_size
> 0)
2093 source_memory_pressure_add_to_write_list(s
);
2095 r
= source_memory_pressure_register(s
, s
->enabled
);
2107 static void event_free_inotify_data(sd_event
*e
, struct inotify_data
*d
) {
2113 assert(hashmap_isempty(d
->inodes
));
2114 assert(hashmap_isempty(d
->wd
));
2116 if (d
->buffer_filled
> 0)
2117 LIST_REMOVE(buffered
, e
->buffered_inotify_data_list
, d
);
2119 hashmap_free(d
->inodes
);
2120 hashmap_free(d
->wd
);
2122 assert_se(hashmap_remove(e
->inotify_data
, &d
->priority
) == d
);
2125 if (!event_pid_changed(e
) &&
2126 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, d
->fd
, NULL
) < 0)
2127 log_debug_errno(errno
, "Failed to remove inotify fd from epoll, ignoring: %m");
2134 static int event_make_inotify_data(
2137 struct inotify_data
**ret
) {
2139 _cleanup_close_
int fd
= -EBADF
;
2140 struct inotify_data
*d
;
2145 d
= hashmap_get(e
->inotify_data
, &priority
);
2152 fd
= inotify_init1(IN_NONBLOCK
|O_CLOEXEC
);
2156 fd
= fd_move_above_stdio(fd
);
2158 d
= new(struct inotify_data
, 1);
2162 *d
= (struct inotify_data
) {
2163 .wakeup
= WAKEUP_INOTIFY_DATA
,
2165 .priority
= priority
,
2168 r
= hashmap_ensure_put(&e
->inotify_data
, &uint64_hash_ops
, &d
->priority
, d
);
2170 d
->fd
= safe_close(d
->fd
);
2175 struct epoll_event ev
= {
2180 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
) < 0) {
2182 d
->fd
= safe_close(d
->fd
); /* let's close this ourselves, as event_free_inotify_data() would otherwise
2183 * remove the fd from the epoll first, which we don't want as we couldn't
2184 * add it in the first place. */
2185 event_free_inotify_data(e
, d
);
2195 static int inode_data_compare(const struct inode_data
*x
, const struct inode_data
*y
) {
2201 r
= CMP(x
->dev
, y
->dev
);
2205 return CMP(x
->ino
, y
->ino
);
2208 static void inode_data_hash_func(const struct inode_data
*d
, struct siphash
*state
) {
2211 siphash24_compress(&d
->dev
, sizeof(d
->dev
), state
);
2212 siphash24_compress(&d
->ino
, sizeof(d
->ino
), state
);
2215 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops
, struct inode_data
, inode_data_hash_func
, inode_data_compare
);
2217 static void event_free_inode_data(
2219 struct inode_data
*d
) {
2226 assert(!d
->event_sources
);
2229 LIST_REMOVE(to_close
, e
->inode_data_to_close_list
, d
);
2233 if (d
->inotify_data
) {
2236 if (d
->inotify_data
->fd
>= 0 && !event_pid_changed(e
)) {
2237 /* So here's a problem. At the time this runs the watch descriptor might already be
2238 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
2239 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
2240 * likely case to happen. */
2242 if (inotify_rm_watch(d
->inotify_data
->fd
, d
->wd
) < 0 && errno
!= EINVAL
)
2243 log_debug_errno(errno
, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d
->wd
);
2246 assert_se(hashmap_remove(d
->inotify_data
->wd
, INT_TO_PTR(d
->wd
)) == d
);
2249 assert_se(hashmap_remove(d
->inotify_data
->inodes
, d
) == d
);
2255 static void event_gc_inotify_data(
2257 struct inotify_data
*d
) {
2261 /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch
2262 * any inode with it anymore, which in turn happens if no event source of this priority is interested
2263 * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC
2264 * (under the expectation that the GC is called again once the counter is decremented). */
2269 if (!hashmap_isempty(d
->inodes
))
2275 event_free_inotify_data(e
, d
);
2278 static void event_gc_inode_data(
2280 struct inode_data
*d
) {
2282 struct inotify_data
*inotify_data
;
2289 if (d
->event_sources
)
2292 inotify_data
= d
->inotify_data
;
2293 event_free_inode_data(e
, d
);
2295 event_gc_inotify_data(e
, inotify_data
);
2298 static int event_make_inode_data(
2300 struct inotify_data
*inotify_data
,
2303 struct inode_data
**ret
) {
2305 struct inode_data
*d
, key
;
2309 assert(inotify_data
);
2311 key
= (struct inode_data
) {
2316 d
= hashmap_get(inotify_data
->inodes
, &key
);
2324 r
= hashmap_ensure_allocated(&inotify_data
->inodes
, &inode_data_hash_ops
);
2328 d
= new(struct inode_data
, 1);
2332 *d
= (struct inode_data
) {
2337 .inotify_data
= inotify_data
,
2340 r
= hashmap_put(inotify_data
->inodes
, d
, d
);
2352 static uint32_t inode_data_determine_mask(struct inode_data
*d
) {
2353 bool excl_unlink
= true;
2354 uint32_t combined
= 0;
2358 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
2359 * the IN_EXCL_UNLINK flag is ANDed instead.
2361 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
2362 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
2363 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
2364 * events we don't care for client-side. */
2366 LIST_FOREACH(inotify
.by_inode_data
, s
, d
->event_sources
) {
2368 if ((s
->inotify
.mask
& IN_EXCL_UNLINK
) == 0)
2369 excl_unlink
= false;
2371 combined
|= s
->inotify
.mask
;
2374 return (combined
& ~(IN_ONESHOT
|IN_DONT_FOLLOW
|IN_ONLYDIR
|IN_EXCL_UNLINK
)) | (excl_unlink
? IN_EXCL_UNLINK
: 0);
2377 static int inode_data_realize_watch(sd_event
*e
, struct inode_data
*d
) {
2378 uint32_t combined_mask
;
2384 combined_mask
= inode_data_determine_mask(d
);
2386 if (d
->wd
>= 0 && combined_mask
== d
->combined_mask
)
2389 r
= hashmap_ensure_allocated(&d
->inotify_data
->wd
, NULL
);
2393 wd
= inotify_add_watch_fd(d
->inotify_data
->fd
, d
->fd
, combined_mask
);
2398 r
= hashmap_put(d
->inotify_data
->wd
, INT_TO_PTR(wd
), d
);
2400 (void) inotify_rm_watch(d
->inotify_data
->fd
, wd
);
2406 } else if (d
->wd
!= wd
) {
2408 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
2409 (void) inotify_rm_watch(d
->fd
, wd
);
2413 d
->combined_mask
= combined_mask
;
2417 static int inotify_exit_callback(sd_event_source
*s
, const struct inotify_event
*event
, void *userdata
) {
2420 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
2423 static int event_add_inotify_fd_internal(
2425 sd_event_source
**ret
,
2429 sd_event_inotify_handler_t callback
,
2432 _cleanup_close_
int donated_fd
= donate
? fd
: -EBADF
;
2433 _cleanup_(source_freep
) sd_event_source
*s
= NULL
;
2434 struct inotify_data
*inotify_data
= NULL
;
2435 struct inode_data
*inode_data
= NULL
;
2439 assert_return(e
, -EINVAL
);
2440 assert_return(e
= event_resolve(e
), -ENOPKG
);
2441 assert_return(fd
>= 0, -EBADF
);
2442 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2443 assert_return(!event_pid_changed(e
), -ECHILD
);
2446 callback
= inotify_exit_callback
;
2448 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
2449 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
2450 * the user can't use them for us. */
2451 if (mask
& IN_MASK_ADD
)
2454 if (fstat(fd
, &st
) < 0)
2457 s
= source_new(e
, !ret
, SOURCE_INOTIFY
);
2461 s
->enabled
= mask
& IN_ONESHOT
? SD_EVENT_ONESHOT
: SD_EVENT_ON
;
2462 s
->inotify
.mask
= mask
;
2463 s
->inotify
.callback
= callback
;
2464 s
->userdata
= userdata
;
2466 /* Allocate an inotify object for this priority, and an inode object within it */
2467 r
= event_make_inotify_data(e
, SD_EVENT_PRIORITY_NORMAL
, &inotify_data
);
2471 r
= event_make_inode_data(e
, inotify_data
, st
.st_dev
, st
.st_ino
, &inode_data
);
2473 event_gc_inotify_data(e
, inotify_data
);
2477 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2478 * the event source, until then, for which we need the original inode. */
2479 if (inode_data
->fd
< 0) {
2480 if (donated_fd
>= 0)
2481 inode_data
->fd
= TAKE_FD(donated_fd
);
2483 inode_data
->fd
= fcntl(fd
, F_DUPFD_CLOEXEC
, 3);
2484 if (inode_data
->fd
< 0) {
2486 event_gc_inode_data(e
, inode_data
);
2491 LIST_PREPEND(to_close
, e
->inode_data_to_close_list
, inode_data
);
2494 /* Link our event source to the inode data object */
2495 LIST_PREPEND(inotify
.by_inode_data
, inode_data
->event_sources
, s
);
2496 s
->inotify
.inode_data
= inode_data
;
2498 /* Actually realize the watch now */
2499 r
= inode_data_realize_watch(e
, inode_data
);
2510 _public_
int sd_event_add_inotify_fd(
2512 sd_event_source
**ret
,
2515 sd_event_inotify_handler_t callback
,
2518 return event_add_inotify_fd_internal(e
, ret
, fd
, /* donate= */ false, mask
, callback
, userdata
);
2521 _public_
int sd_event_add_inotify(
2523 sd_event_source
**ret
,
2526 sd_event_inotify_handler_t callback
,
2529 sd_event_source
*s
= NULL
; /* avoid false maybe-uninitialized warning */
2532 assert_return(path
, -EINVAL
);
2534 fd
= open(path
, O_PATH
| O_CLOEXEC
|
2535 (mask
& IN_ONLYDIR
? O_DIRECTORY
: 0) |
2536 (mask
& IN_DONT_FOLLOW
? O_NOFOLLOW
: 0));
2540 r
= event_add_inotify_fd_internal(e
, &s
, fd
, /* donate= */ true, mask
, callback
, userdata
);
2544 (void) sd_event_source_set_description(s
, path
);
2552 static sd_event_source
* event_source_free(sd_event_source
*s
) {
2556 /* Here's a special hack: when we are called from a
2557 * dispatch handler we won't free the event source
2558 * immediately, but we will detach the fd from the
2559 * epoll. This way it is safe for the caller to unref
2560 * the event source and immediately close the fd, but
2561 * we still retain a valid event source object after
2565 source_disconnect(s
);
2572 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source
, sd_event_source
, event_source_free
);
2574 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
2575 assert_return(s
, -EINVAL
);
2576 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2578 return free_and_strdup(&s
->description
, description
);
2581 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
2582 assert_return(s
, -EINVAL
);
2583 assert_return(description
, -EINVAL
);
2584 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2586 if (!s
->description
)
2589 *description
= s
->description
;
2593 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
2594 assert_return(s
, NULL
);
2599 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
2600 assert_return(s
, -EINVAL
);
2601 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
2602 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2603 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2608 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
2609 assert_return(s
, -EINVAL
);
2610 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2611 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2616 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
2619 assert_return(s
, -EINVAL
);
2620 assert_return(fd
>= 0, -EBADF
);
2621 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2622 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2627 if (event_source_is_offline(s
)) {
2629 s
->io
.registered
= false;
2633 saved_fd
= s
->io
.fd
;
2634 assert(s
->io
.registered
);
2637 s
->io
.registered
= false;
2639 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
2641 s
->io
.fd
= saved_fd
;
2642 s
->io
.registered
= true;
2646 (void) epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
2652 _public_
int sd_event_source_get_io_fd_own(sd_event_source
*s
) {
2653 assert_return(s
, -EINVAL
);
2654 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2659 _public_
int sd_event_source_set_io_fd_own(sd_event_source
*s
, int own
) {
2660 assert_return(s
, -EINVAL
);
2661 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2667 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
2668 assert_return(s
, -EINVAL
);
2669 assert_return(events
, -EINVAL
);
2670 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2671 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2673 *events
= s
->io
.events
;
2677 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
2680 assert_return(s
, -EINVAL
);
2681 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2682 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
2683 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2684 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2686 /* edge-triggered updates are never skipped, so we can reset edges */
2687 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
2690 r
= source_set_pending(s
, false);
2694 if (event_source_is_online(s
)) {
2695 r
= source_io_register(s
, s
->enabled
, events
);
2700 s
->io
.events
= events
;
2705 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
2706 assert_return(s
, -EINVAL
);
2707 assert_return(revents
, -EINVAL
);
2708 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
2709 assert_return(s
->pending
, -ENODATA
);
2710 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2712 *revents
= s
->io
.revents
;
2716 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
2717 assert_return(s
, -EINVAL
);
2718 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
2719 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2721 return s
->signal
.sig
;
2724 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
2725 assert_return(s
, -EINVAL
);
2726 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2728 *priority
= s
->priority
;
2732 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
2733 bool rm_inotify
= false, rm_inode
= false;
2734 struct inotify_data
*new_inotify_data
= NULL
;
2735 struct inode_data
*new_inode_data
= NULL
;
2738 assert_return(s
, -EINVAL
);
2739 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2740 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2742 if (s
->priority
== priority
)
2745 if (s
->type
== SOURCE_INOTIFY
) {
2746 struct inode_data
*old_inode_data
;
2748 assert(s
->inotify
.inode_data
);
2749 old_inode_data
= s
->inotify
.inode_data
;
2751 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2752 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2753 * events we allow priority changes only until the first following iteration. */
2754 if (old_inode_data
->fd
< 0)
2757 r
= event_make_inotify_data(s
->event
, priority
, &new_inotify_data
);
2762 r
= event_make_inode_data(s
->event
, new_inotify_data
, old_inode_data
->dev
, old_inode_data
->ino
, &new_inode_data
);
2767 if (new_inode_data
->fd
< 0) {
2768 /* Duplicate the fd for the new inode object if we don't have any yet */
2769 new_inode_data
->fd
= fcntl(old_inode_data
->fd
, F_DUPFD_CLOEXEC
, 3);
2770 if (new_inode_data
->fd
< 0) {
2775 LIST_PREPEND(to_close
, s
->event
->inode_data_to_close_list
, new_inode_data
);
2778 /* Move the event source to the new inode data structure */
2779 LIST_REMOVE(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2780 LIST_PREPEND(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2781 s
->inotify
.inode_data
= new_inode_data
;
2783 /* Now create the new watch */
2784 r
= inode_data_realize_watch(s
->event
, new_inode_data
);
2787 LIST_REMOVE(inotify
.by_inode_data
, new_inode_data
->event_sources
, s
);
2788 LIST_PREPEND(inotify
.by_inode_data
, old_inode_data
->event_sources
, s
);
2789 s
->inotify
.inode_data
= old_inode_data
;
2793 s
->priority
= priority
;
2795 event_gc_inode_data(s
->event
, old_inode_data
);
2797 } else if (s
->type
== SOURCE_SIGNAL
&& event_source_is_online(s
)) {
2798 struct signal_data
*old
, *d
;
2800 /* Move us from the signalfd belonging to the old
2801 * priority to the signalfd of the new priority */
2803 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
2805 s
->priority
= priority
;
2807 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
2809 s
->priority
= old
->priority
;
2813 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
2815 s
->priority
= priority
;
2817 event_source_pp_prioq_reshuffle(s
);
2819 if (s
->type
== SOURCE_EXIT
)
2820 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2826 event_free_inode_data(s
->event
, new_inode_data
);
2829 event_free_inotify_data(s
->event
, new_inotify_data
);
2834 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *ret
) {
2835 /* Quick mode: the event source doesn't exist and we only want to query boolean enablement state. */
2839 assert_return(s
, -EINVAL
);
2840 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
2845 return s
->enabled
!= SD_EVENT_OFF
;
2848 static int event_source_offline(
2857 assert(enabled
== SD_EVENT_OFF
|| ratelimited
);
2859 /* Unset the pending flag when this event source is disabled */
2860 if (s
->enabled
!= SD_EVENT_OFF
&&
2861 enabled
== SD_EVENT_OFF
&&
2862 !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2863 r
= source_set_pending(s
, false);
2868 was_offline
= event_source_is_offline(s
);
2869 s
->enabled
= enabled
;
2870 s
->ratelimited
= ratelimited
;
2875 source_io_unregister(s
);
2879 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2884 assert(s
->event
->n_online_child_sources
> 0);
2885 s
->event
->n_online_child_sources
--;
2888 if (EVENT_SOURCE_WATCH_PIDFD(s
))
2889 source_child_pidfd_unregister(s
);
2891 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2895 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
2898 case SOURCE_MEMORY_PRESSURE
:
2899 source_memory_pressure_unregister(s
);
2902 case SOURCE_TIME_REALTIME
:
2903 case SOURCE_TIME_BOOTTIME
:
2904 case SOURCE_TIME_MONOTONIC
:
2905 case SOURCE_TIME_REALTIME_ALARM
:
2906 case SOURCE_TIME_BOOTTIME_ALARM
:
2909 case SOURCE_INOTIFY
:
2913 assert_not_reached();
2916 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2917 event_source_time_prioq_reshuffle(s
);
2922 static int event_source_online(
2931 assert(enabled
!= SD_EVENT_OFF
|| !ratelimited
);
2933 /* Unset the pending flag when this event source is enabled */
2934 if (s
->enabled
== SD_EVENT_OFF
&&
2935 enabled
!= SD_EVENT_OFF
&&
2936 !IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
2937 r
= source_set_pending(s
, false);
2942 /* Are we really ready for onlining? */
2943 if (enabled
== SD_EVENT_OFF
|| ratelimited
) {
2944 /* Nope, we are not ready for onlining, then just update the precise state and exit */
2945 s
->enabled
= enabled
;
2946 s
->ratelimited
= ratelimited
;
2950 was_online
= event_source_is_online(s
);
2954 r
= source_io_register(s
, enabled
, s
->io
.events
);
2960 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
2962 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
2969 if (EVENT_SOURCE_WATCH_PIDFD(s
)) {
2970 /* yes, we have pidfd */
2972 r
= source_child_pidfd_register(s
, enabled
);
2976 /* no pidfd, or something other to watch for than WEXITED */
2978 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
2980 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
2986 s
->event
->n_online_child_sources
++;
2989 case SOURCE_MEMORY_PRESSURE
:
2990 r
= source_memory_pressure_register(s
, enabled
);
2996 case SOURCE_TIME_REALTIME
:
2997 case SOURCE_TIME_BOOTTIME
:
2998 case SOURCE_TIME_MONOTONIC
:
2999 case SOURCE_TIME_REALTIME_ALARM
:
3000 case SOURCE_TIME_BOOTTIME_ALARM
:
3004 case SOURCE_INOTIFY
:
3008 assert_not_reached();
3011 s
->enabled
= enabled
;
3012 s
->ratelimited
= ratelimited
;
3014 /* Non-failing operations below */
3015 if (s
->type
== SOURCE_EXIT
)
3016 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
3018 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
3019 event_source_time_prioq_reshuffle(s
);
3024 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
3027 assert_return(IN_SET(m
, SD_EVENT_OFF
, SD_EVENT_ON
, SD_EVENT_ONESHOT
), -EINVAL
);
3029 /* Quick mode: if the source doesn't exist, SD_EVENT_OFF is a noop. */
3030 if (m
== SD_EVENT_OFF
&& !s
)
3033 assert_return(s
, -EINVAL
);
3034 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3036 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
3037 if (s
->event
->state
== SD_EVENT_FINISHED
)
3038 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
3040 if (s
->enabled
== m
) /* No change? */
3043 if (m
== SD_EVENT_OFF
)
3044 r
= event_source_offline(s
, m
, s
->ratelimited
);
3046 if (s
->enabled
!= SD_EVENT_OFF
) {
3047 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
3048 * event source is already enabled after all. */
3053 r
= event_source_online(s
, m
, s
->ratelimited
);
3058 event_source_pp_prioq_reshuffle(s
);
3062 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
3063 assert_return(s
, -EINVAL
);
3064 assert_return(usec
, -EINVAL
);
3065 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3066 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3068 *usec
= s
->time
.next
;
3072 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
3075 assert_return(s
, -EINVAL
);
3076 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3077 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3078 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3080 r
= source_set_pending(s
, false);
3084 s
->time
.next
= usec
;
3086 event_source_time_prioq_reshuffle(s
);
3090 _public_
int sd_event_source_set_time_relative(sd_event_source
*s
, uint64_t usec
) {
3094 assert_return(s
, -EINVAL
);
3095 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3097 if (usec
== USEC_INFINITY
)
3098 return sd_event_source_set_time(s
, USEC_INFINITY
);
3100 r
= sd_event_now(s
->event
, event_source_type_to_clock(s
->type
), &t
);
3104 usec
= usec_add(t
, usec
);
3105 if (usec
== USEC_INFINITY
)
3108 return sd_event_source_set_time(s
, usec
);
3111 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
3112 assert_return(s
, -EINVAL
);
3113 assert_return(usec
, -EINVAL
);
3114 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3115 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3117 *usec
= s
->time
.accuracy
;
3121 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
3124 assert_return(s
, -EINVAL
);
3125 assert_return(usec
!= UINT64_MAX
, -EINVAL
);
3126 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3127 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3128 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3130 r
= source_set_pending(s
, false);
3135 usec
= DEFAULT_ACCURACY_USEC
;
3137 s
->time
.accuracy
= usec
;
3139 event_source_time_prioq_reshuffle(s
);
3143 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
3144 assert_return(s
, -EINVAL
);
3145 assert_return(clock
, -EINVAL
);
3146 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
3147 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3149 *clock
= event_source_type_to_clock(s
->type
);
3153 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
3154 assert_return(s
, -EINVAL
);
3155 assert_return(pid
, -EINVAL
);
3156 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3157 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3159 *pid
= s
->child
.pid
;
3163 _public_
int sd_event_source_get_child_pidfd(sd_event_source
*s
) {
3164 assert_return(s
, -EINVAL
);
3165 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3166 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3168 if (s
->child
.pidfd
< 0)
3171 return s
->child
.pidfd
;
3174 _public_
int sd_event_source_send_child_signal(sd_event_source
*s
, int sig
, const siginfo_t
*si
, unsigned flags
) {
3175 assert_return(s
, -EINVAL
);
3176 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3177 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3178 assert_return(SIGNAL_VALID(sig
), -EINVAL
);
3180 /* If we already have seen indication the process exited refuse sending a signal early. This way we
3181 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
3183 if (s
->child
.exited
)
3186 if (s
->child
.pidfd
>= 0) {
3189 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
3194 if (pidfd_send_signal(s
->child
.pidfd
, sig
, si
? ©
: NULL
, 0) < 0) {
3195 /* Let's propagate the error only if the system call is not implemented or prohibited */
3196 if (!ERRNO_IS_NOT_SUPPORTED(errno
) && !ERRNO_IS_PRIVILEGE(errno
))
3202 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
3208 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
3209 siginfo_t copy
= *si
;
3211 if (rt_sigqueueinfo(s
->child
.pid
, sig
, ©
) < 0)
3213 } else if (kill(s
->child
.pid
, sig
) < 0)
3219 _public_
int sd_event_source_get_child_pidfd_own(sd_event_source
*s
) {
3220 assert_return(s
, -EINVAL
);
3221 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3223 if (s
->child
.pidfd
< 0)
3226 return s
->child
.pidfd_owned
;
3229 _public_
int sd_event_source_set_child_pidfd_own(sd_event_source
*s
, int own
) {
3230 assert_return(s
, -EINVAL
);
3231 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3233 if (s
->child
.pidfd
< 0)
3236 s
->child
.pidfd_owned
= own
;
3240 _public_
int sd_event_source_get_child_process_own(sd_event_source
*s
) {
3241 assert_return(s
, -EINVAL
);
3242 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3244 return s
->child
.process_owned
;
3247 _public_
int sd_event_source_set_child_process_own(sd_event_source
*s
, int own
) {
3248 assert_return(s
, -EINVAL
);
3249 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
3251 s
->child
.process_owned
= own
;
3255 _public_
int sd_event_source_get_inotify_mask(sd_event_source
*s
, uint32_t *mask
) {
3256 assert_return(s
, -EINVAL
);
3257 assert_return(mask
, -EINVAL
);
3258 assert_return(s
->type
== SOURCE_INOTIFY
, -EDOM
);
3259 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3261 *mask
= s
->inotify
.mask
;
3265 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
3268 assert_return(s
, -EINVAL
);
3269 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
3270 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
3271 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
3273 if (s
->prepare
== callback
)
3276 if (callback
&& s
->prepare
) {
3277 s
->prepare
= callback
;
3281 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
3285 s
->prepare
= callback
;
3288 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
3292 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
3297 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
3298 assert_return(s
, NULL
);
3303 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
3306 assert_return(s
, NULL
);
3309 s
->userdata
= userdata
;
3314 static int event_source_enter_ratelimited(sd_event_source
*s
) {
3319 /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
3320 * the end of the rate limit time window, much as if it was a timer event source. */
3323 return 0; /* Already ratelimited, this is a NOP hence */
3325 /* Make sure we can install a CLOCK_MONOTONIC event further down. */
3326 r
= setup_clock_data(s
->event
, &s
->event
->monotonic
, CLOCK_MONOTONIC
);
3330 /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
3331 * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
3332 * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
3333 if (EVENT_SOURCE_IS_TIME(s
->type
))
3334 event_source_time_prioq_remove(s
, event_get_clock_data(s
->event
, s
->type
));
3336 /* Now, let's add the event source to the monotonic clock instead */
3337 r
= event_source_time_prioq_put(s
, &s
->event
->monotonic
);
3341 /* And let's take the event source officially offline */
3342 r
= event_source_offline(s
, s
->enabled
, /* ratelimited= */ true);
3344 event_source_time_prioq_remove(s
, &s
->event
->monotonic
);
3348 event_source_pp_prioq_reshuffle(s
);
3350 log_debug("Event source %p (%s) entered rate limit state.", s
, strna(s
->description
));
3354 /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
3355 * space for it should already be allocated. */
3356 if (EVENT_SOURCE_IS_TIME(s
->type
))
3357 assert_se(event_source_time_prioq_put(s
, event_get_clock_data(s
->event
, s
->type
)) >= 0);
3362 static int event_source_leave_ratelimit(sd_event_source
*s
, bool run_callback
) {
3367 if (!s
->ratelimited
)
3370 /* Let's take the event source out of the monotonic prioq first. */
3371 event_source_time_prioq_remove(s
, &s
->event
->monotonic
);
3373 /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
3374 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
3375 r
= event_source_time_prioq_put(s
, event_get_clock_data(s
->event
, s
->type
));
3380 /* Let's try to take it online again. */
3381 r
= event_source_online(s
, s
->enabled
, /* ratelimited= */ false);
3383 /* Do something roughly sensible when this failed: undo the two prioq ops above */
3384 if (EVENT_SOURCE_IS_TIME(s
->type
))
3385 event_source_time_prioq_remove(s
, event_get_clock_data(s
->event
, s
->type
));
3390 event_source_pp_prioq_reshuffle(s
);
3391 ratelimit_reset(&s
->rate_limit
);
3393 log_debug("Event source %p (%s) left rate limit state.", s
, strna(s
->description
));
3395 if (run_callback
&& s
->ratelimit_expire_callback
) {
3396 s
->dispatching
= true;
3397 r
= s
->ratelimit_expire_callback(s
, s
->userdata
);
3398 s
->dispatching
= false;
3401 log_debug_errno(r
, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m",
3402 strna(s
->description
),
3403 event_source_type_to_string(s
->type
),
3404 s
->exit_on_failure
? "exiting" : "disabling");
3406 if (s
->exit_on_failure
)
3407 (void) sd_event_exit(s
->event
, r
);
3413 assert_se(sd_event_source_set_enabled(s
, SD_EVENT_OFF
) >= 0);
3421 /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
3422 * simply put it back in it, maybe we can then process it more successfully next iteration. */
3423 assert_se(event_source_time_prioq_put(s
, &s
->event
->monotonic
) >= 0);
3428 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
3435 if (a
>= USEC_INFINITY
)
3436 return USEC_INFINITY
;
3441 initialize_perturb(e
);
3444 Find a good time to wake up again between times a and b. We
3445 have two goals here:
3447 a) We want to wake up as seldom as possible, hence prefer
3448 later times over earlier times.
3450 b) But if we have to wake up, then let's make sure to
3451 dispatch as much as possible on the entire system.
3453 We implement this by waking up everywhere at the same time
3454 within any given minute if we can, synchronised via the
3455 perturbation value determined from the boot ID. If we can't,
3456 then we try to find the same spot in every 10s, then 1s and
3457 then 250ms step. Otherwise, we pick the last possible time
3461 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
3463 if (_unlikely_(c
< USEC_PER_MINUTE
))
3466 c
-= USEC_PER_MINUTE
;
3472 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
3474 if (_unlikely_(c
< USEC_PER_SEC
*10))
3477 c
-= USEC_PER_SEC
*10;
3483 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
3485 if (_unlikely_(c
< USEC_PER_SEC
))
3494 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
3496 if (_unlikely_(c
< USEC_PER_MSEC
*250))
3499 c
-= USEC_PER_MSEC
*250;
3508 static int event_arm_timer(
3510 struct clock_data
*d
) {
3512 struct itimerspec its
= {};
3513 sd_event_source
*a
, *b
;
3519 if (!d
->needs_rearm
)
3522 d
->needs_rearm
= false;
3524 a
= prioq_peek(d
->earliest
);
3525 assert(!a
|| EVENT_SOURCE_USES_TIME_PRIOQ(a
->type
));
3526 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| time_event_source_next(a
) == USEC_INFINITY
) {
3531 if (d
->next
== USEC_INFINITY
)
3535 if (timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
) < 0)
3538 d
->next
= USEC_INFINITY
;
3542 b
= prioq_peek(d
->latest
);
3543 assert(!b
|| EVENT_SOURCE_USES_TIME_PRIOQ(b
->type
));
3544 assert(b
&& b
->enabled
!= SD_EVENT_OFF
);
3546 t
= sleep_between(e
, time_event_source_next(a
), time_event_source_latest(b
));
3550 assert_se(d
->fd
>= 0);
3553 /* We don't want to disarm here, just mean some time looooong ago. */
3554 its
.it_value
.tv_sec
= 0;
3555 its
.it_value
.tv_nsec
= 1;
3557 timespec_store(&its
.it_value
, t
);
3559 if (timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
) < 0)
3566 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
3569 assert(s
->type
== SOURCE_IO
);
3571 /* If the event source was already pending, we just OR in the
3572 * new revents, otherwise we reset the value. The ORing is
3573 * necessary to handle EPOLLONESHOT events properly where
3574 * readability might happen independently of writability, and
3575 * we need to keep track of both */
3578 s
->io
.revents
|= revents
;
3580 s
->io
.revents
= revents
;
3582 return source_set_pending(s
, true);
3585 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
3592 assert_return(events
== EPOLLIN
, -EIO
);
3594 ss
= read(fd
, &x
, sizeof(x
));
3596 if (ERRNO_IS_TRANSIENT(errno
))
3602 if (_unlikely_(ss
!= sizeof(x
)))
3606 *next
= USEC_INFINITY
;
3611 static int process_timer(
3614 struct clock_data
*d
) {
3617 bool callback_invoked
= false;
3624 s
= prioq_peek(d
->earliest
);
3625 assert(!s
|| EVENT_SOURCE_USES_TIME_PRIOQ(s
->type
));
3627 if (!s
|| time_event_source_next(s
) > n
)
3630 if (s
->ratelimited
) {
3631 /* This is an event sources whose ratelimit window has ended. Let's turn it on
3633 assert(s
->ratelimited
);
3635 r
= event_source_leave_ratelimit(s
, /* run_callback */ true);
3639 callback_invoked
= true;
3644 if (s
->enabled
== SD_EVENT_OFF
|| s
->pending
)
3647 r
= source_set_pending(s
, true);
3651 event_source_time_prioq_reshuffle(s
);
3654 return callback_invoked
;
3657 static int process_child(sd_event
*e
, int64_t threshold
, int64_t *ret_min_priority
) {
3658 int64_t min_priority
= threshold
;
3659 bool something_new
= false;
3664 assert(ret_min_priority
);
3666 if (!e
->need_process_child
) {
3667 *ret_min_priority
= min_priority
;
3671 e
->need_process_child
= false;
3673 /* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait
3674 * for, instead of using P_ALL. This is because we only want to get child information of very
3675 * specific child processes, and not all of them. We might not have processed the SIGCHLD event
3676 * of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
3677 * hence we really don't want anything flushed out of the kernel's queue that we don't care
3678 * about. Since this is O(n) this means that if you have a lot of processes you probably want
3679 * to handle SIGCHLD yourself.
3681 * We do not reap the children here (by using WNOWAIT), this is only done after the event
3682 * source is dispatched so that the callback still sees the process as a zombie. */
3684 HASHMAP_FOREACH(s
, e
->child_sources
) {
3685 assert(s
->type
== SOURCE_CHILD
);
3687 if (s
->priority
> threshold
)
3693 if (event_source_is_offline(s
))
3696 if (s
->child
.exited
)
3699 if (EVENT_SOURCE_WATCH_PIDFD(s
))
3700 /* There's a usable pidfd known for this event source? Then don't waitid() for
3704 zero(s
->child
.siginfo
);
3705 if (waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
3706 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
) < 0)
3707 return negative_errno();
3709 if (s
->child
.siginfo
.si_pid
!= 0) {
3710 bool zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
3713 s
->child
.exited
= true;
3715 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
3716 /* If the child isn't dead then let's immediately remove the state
3717 * change from the queue, since there's no benefit in leaving it
3720 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
3721 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
3724 r
= source_set_pending(s
, true);
3728 something_new
= true;
3729 min_priority
= MIN(min_priority
, s
->priority
);
3734 *ret_min_priority
= min_priority
;
3735 return something_new
;
3738 static int process_pidfd(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
3741 assert(s
->type
== SOURCE_CHILD
);
3746 if (event_source_is_offline(s
))
3749 if (!EVENT_SOURCE_WATCH_PIDFD(s
))
3752 zero(s
->child
.siginfo
);
3753 if (waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
| WNOWAIT
| s
->child
.options
) < 0)
3756 if (s
->child
.siginfo
.si_pid
== 0)
3759 if (IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
))
3760 s
->child
.exited
= true;
3762 return source_set_pending(s
, true);
3765 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
, int64_t *min_priority
) {
3770 assert_return(events
== EPOLLIN
, -EIO
);
3771 assert(min_priority
);
3773 /* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make
3774 * sure to recheck the children we watch. This is because we only ever dequeue the first signal
3775 * per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know,
3776 * but we might have higher priority children we care about hence we need to check that
3779 if (sigismember(&d
->sigset
, SIGCHLD
))
3780 e
->need_process_child
= true;
3782 /* If there's already an event source pending for this priority we don't read another */
3787 struct signalfd_siginfo si
;
3789 sd_event_source
*s
= NULL
;
3791 n
= read(d
->fd
, &si
, sizeof(si
));
3793 if (ERRNO_IS_TRANSIENT(errno
))
3799 if (_unlikely_(n
!= sizeof(si
)))
3802 assert(SIGNAL_VALID(si
.ssi_signo
));
3804 if (e
->signal_sources
)
3805 s
= e
->signal_sources
[si
.ssi_signo
];
3811 s
->signal
.siginfo
= si
;
3814 r
= source_set_pending(s
, true);
3817 if (r
> 0 && *min_priority
>= s
->priority
) {
3818 *min_priority
= s
->priority
;
3819 return 1; /* an event source with smaller priority is queued. */
3826 static int event_inotify_data_read(sd_event
*e
, struct inotify_data
*d
, uint32_t revents
, int64_t threshold
) {
3832 assert_return(revents
== EPOLLIN
, -EIO
);
3834 /* If there's already an event source pending for this priority, don't read another */
3835 if (d
->n_pending
> 0)
3838 /* Is the read buffer non-empty? If so, let's not read more */
3839 if (d
->buffer_filled
> 0)
3842 if (d
->priority
> threshold
)
3845 n
= read(d
->fd
, &d
->buffer
, sizeof(d
->buffer
));
3847 if (ERRNO_IS_TRANSIENT(errno
))
3854 d
->buffer_filled
= (size_t) n
;
3855 LIST_PREPEND(buffered
, e
->buffered_inotify_data_list
, d
);
3860 static void event_inotify_data_drop(sd_event
*e
, struct inotify_data
*d
, size_t sz
) {
3863 assert(sz
<= d
->buffer_filled
);
3868 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3869 memmove(d
->buffer
.raw
, d
->buffer
.raw
+ sz
, d
->buffer_filled
- sz
);
3870 d
->buffer_filled
-= sz
;
3872 if (d
->buffer_filled
== 0)
3873 LIST_REMOVE(buffered
, e
->buffered_inotify_data_list
, d
);
3876 static int event_inotify_data_process(sd_event
*e
, struct inotify_data
*d
) {
3882 /* If there's already an event source pending for this priority, don't read another */
3883 if (d
->n_pending
> 0)
3886 while (d
->buffer_filled
> 0) {
3889 /* Let's validate that the event structures are complete */
3890 if (d
->buffer_filled
< offsetof(struct inotify_event
, name
))
3893 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
3894 if (d
->buffer_filled
< sz
)
3897 if (d
->buffer
.ev
.mask
& IN_Q_OVERFLOW
) {
3898 struct inode_data
*inode_data
;
3900 /* The queue overran, let's pass this event to all event sources connected to this inotify
3903 HASHMAP_FOREACH(inode_data
, d
->inodes
)
3904 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
3906 if (event_source_is_offline(s
))
3909 r
= source_set_pending(s
, true);
3914 struct inode_data
*inode_data
;
3916 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3917 * our watch descriptor table. */
3918 if (d
->buffer
.ev
.mask
& IN_IGNORED
) {
3920 inode_data
= hashmap_remove(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3922 event_inotify_data_drop(e
, d
, sz
);
3926 /* The watch descriptor was removed by the kernel, let's drop it here too */
3927 inode_data
->wd
= -1;
3929 inode_data
= hashmap_get(d
->wd
, INT_TO_PTR(d
->buffer
.ev
.wd
));
3931 event_inotify_data_drop(e
, d
, sz
);
3936 /* Trigger all event sources that are interested in these events. Also trigger all event
3937 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3938 LIST_FOREACH(inotify
.by_inode_data
, s
, inode_data
->event_sources
) {
3940 if (event_source_is_offline(s
))
3943 if ((d
->buffer
.ev
.mask
& (IN_IGNORED
|IN_UNMOUNT
)) == 0 &&
3944 (s
->inotify
.mask
& d
->buffer
.ev
.mask
& IN_ALL_EVENTS
) == 0)
3947 r
= source_set_pending(s
, true);
3953 /* Something pending now? If so, let's finish, otherwise let's read more. */
3954 if (d
->n_pending
> 0)
3961 static int process_inotify(sd_event
*e
) {
3966 LIST_FOREACH(buffered
, d
, e
->buffered_inotify_data_list
) {
3967 r
= event_inotify_data_process(e
, d
);
3977 static int process_memory_pressure(sd_event_source
*s
, uint32_t revents
) {
3979 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
3982 s
->memory_pressure
.revents
|= revents
;
3984 s
->memory_pressure
.revents
= revents
;
3986 return source_set_pending(s
, true);
3989 static int source_memory_pressure_write(sd_event_source
*s
) {
3994 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
3996 /* once we start writing, the buffer is locked, we allow no further changes. */
3997 s
->memory_pressure
.locked
= true;
3999 if (s
->memory_pressure
.write_buffer_size
> 0) {
4000 n
= write(s
->memory_pressure
.fd
, s
->memory_pressure
.write_buffer
, s
->memory_pressure
.write_buffer_size
);
4002 if (!ERRNO_IS_TRANSIENT(errno
))
4012 if ((size_t) n
== s
->memory_pressure
.write_buffer_size
) {
4013 s
->memory_pressure
.write_buffer
= mfree(s
->memory_pressure
.write_buffer
);
4016 s
->memory_pressure
.write_buffer_size
= 0;
4018 /* Update epoll events mask, since we have now written everything and don't care for EPOLLOUT anymore */
4019 r
= source_memory_pressure_register(s
, s
->enabled
);
4024 _cleanup_free_
void *c
= NULL
;
4026 assert((size_t) n
< s
->memory_pressure
.write_buffer_size
);
4028 c
= memdup((uint8_t*) s
->memory_pressure
.write_buffer
+ n
, s
->memory_pressure
.write_buffer_size
- n
);
4032 free_and_replace(s
->memory_pressure
.write_buffer
, c
);
4033 s
->memory_pressure
.write_buffer_size
-= n
;
4040 static int source_memory_pressure_initiate_dispatch(sd_event_source
*s
) {
4044 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
4046 r
= source_memory_pressure_write(s
);
4050 return 1; /* if we wrote something, then don't continue with dispatching user dispatch
4051 * function. Instead, shortcut it so that we wait for next EPOLLOUT immediately. */
4053 /* No pending incoming IO? Then let's not continue further */
4054 if ((s
->memory_pressure
.revents
& (EPOLLIN
|EPOLLPRI
)) == 0) {
4056 /* Treat IO errors on the notifier the same ways errors returned from a callback */
4057 if ((s
->memory_pressure
.revents
& (EPOLLHUP
|EPOLLERR
|EPOLLRDHUP
)) != 0)
4060 return 1; /* leave dispatch, we already processed everything */
4063 if (s
->memory_pressure
.revents
& EPOLLIN
) {
4064 uint8_t pipe_buf
[PIPE_BUF
];
4067 /* If the fd is readable, then flush out anything that might be queued */
4069 n
= read(s
->memory_pressure
.fd
, pipe_buf
, sizeof(pipe_buf
));
4070 if (n
< 0 && !ERRNO_IS_TRANSIENT(errno
))
4074 return 0; /* go on, dispatch to user callback */
4077 static int source_dispatch(sd_event_source
*s
) {
4078 EventSourceType saved_type
;
4079 sd_event
*saved_event
;
4083 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
4085 /* Save the event source type, here, so that we still know it after the event callback which might
4086 * invalidate the event. */
4087 saved_type
= s
->type
;
4089 /* Similarly, store a reference to the event loop object, so that we can still access it after the
4090 * callback might have invalidated/disconnected the event source. */
4091 saved_event
= s
->event
;
4092 PROTECT_EVENT(saved_event
);
4094 /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
4095 assert(!s
->ratelimited
);
4096 if (!ratelimit_below(&s
->rate_limit
)) {
4097 r
= event_source_enter_ratelimited(s
);
4104 if (!IN_SET(s
->type
, SOURCE_DEFER
, SOURCE_EXIT
)) {
4105 r
= source_set_pending(s
, false);
4110 if (s
->type
!= SOURCE_POST
) {
4113 /* If we execute a non-post source, let's mark all post sources as pending. */
4115 SET_FOREACH(z
, s
->event
->post_sources
) {
4116 if (event_source_is_offline(z
))
4119 r
= source_set_pending(z
, true);
4125 if (s
->type
== SOURCE_MEMORY_PRESSURE
) {
4126 r
= source_memory_pressure_initiate_dispatch(s
);
4127 if (r
== -EIO
) /* handle EIO errors similar to callback errors */
4131 if (r
> 0) /* already handled */
4135 if (s
->enabled
== SD_EVENT_ONESHOT
) {
4136 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
4141 s
->dispatching
= true;
4146 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
4149 case SOURCE_TIME_REALTIME
:
4150 case SOURCE_TIME_BOOTTIME
:
4151 case SOURCE_TIME_MONOTONIC
:
4152 case SOURCE_TIME_REALTIME_ALARM
:
4153 case SOURCE_TIME_BOOTTIME_ALARM
:
4154 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
4158 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
4161 case SOURCE_CHILD
: {
4164 zombie
= IN_SET(s
->child
.siginfo
.si_code
, CLD_EXITED
, CLD_KILLED
, CLD_DUMPED
);
4166 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
4168 /* Now, reap the PID for good. */
4170 (void) waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
4171 s
->child
.waited
= true;
4178 r
= s
->defer
.callback(s
, s
->userdata
);
4182 r
= s
->post
.callback(s
, s
->userdata
);
4186 r
= s
->exit
.callback(s
, s
->userdata
);
4189 case SOURCE_INOTIFY
: {
4190 struct sd_event
*e
= s
->event
;
4191 struct inotify_data
*d
;
4194 assert(s
->inotify
.inode_data
);
4195 assert_se(d
= s
->inotify
.inode_data
->inotify_data
);
4197 assert(d
->buffer_filled
>= offsetof(struct inotify_event
, name
));
4198 sz
= offsetof(struct inotify_event
, name
) + d
->buffer
.ev
.len
;
4199 assert(d
->buffer_filled
>= sz
);
4201 /* If the inotify callback destroys the event source then this likely means we don't need to
4202 * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd
4203 * free it immediately, then we couldn't drop the event from the inotify event queue without
4204 * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it
4205 * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then
4206 * explicitly GC it after we are done dropping the inotify event from the buffer. */
4208 r
= s
->inotify
.callback(s
, &d
->buffer
.ev
, s
->userdata
);
4211 /* When no event is pending anymore on this inotify object, then let's drop the event from
4212 * the inotify event queue buffer. */
4213 if (d
->n_pending
== 0)
4214 event_inotify_data_drop(e
, d
, sz
);
4216 /* Now we don't want to access 'd' anymore, it's OK to GC now. */
4217 event_gc_inotify_data(e
, d
);
4221 case SOURCE_MEMORY_PRESSURE
:
4222 r
= s
->memory_pressure
.callback(s
, s
->userdata
);
4225 case SOURCE_WATCHDOG
:
4226 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
4227 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
4228 assert_not_reached();
4231 s
->dispatching
= false;
4235 log_debug_errno(r
, "Event source %s (type %s) returned error, %s: %m",
4236 strna(s
->description
),
4237 event_source_type_to_string(saved_type
),
4238 s
->exit_on_failure
? "exiting" : "disabling");
4240 if (s
->exit_on_failure
)
4241 (void) sd_event_exit(saved_event
, r
);
4247 assert_se(sd_event_source_set_enabled(s
, SD_EVENT_OFF
) >= 0);
4252 static int event_prepare(sd_event
*e
) {
4260 s
= prioq_peek(e
->prepare
);
4261 if (!s
|| s
->prepare_iteration
== e
->iteration
|| event_source_is_offline(s
))
4264 s
->prepare_iteration
= e
->iteration
;
4265 prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
4268 s
->dispatching
= true;
4269 r
= s
->prepare(s
, s
->userdata
);
4270 s
->dispatching
= false;
4273 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, %s: %m",
4274 strna(s
->description
),
4275 event_source_type_to_string(s
->type
),
4276 s
->exit_on_failure
? "exiting" : "disabling");
4278 if (s
->exit_on_failure
)
4279 (void) sd_event_exit(e
, r
);
4285 assert_se(sd_event_source_set_enabled(s
, SD_EVENT_OFF
) >= 0);
4291 static int dispatch_exit(sd_event
*e
) {
4297 p
= prioq_peek(e
->exit
);
4298 assert(!p
|| p
->type
== SOURCE_EXIT
);
4300 if (!p
|| event_source_is_offline(p
)) {
4301 e
->state
= SD_EVENT_FINISHED
;
4307 e
->state
= SD_EVENT_EXITING
;
4308 r
= source_dispatch(p
);
4309 e
->state
= SD_EVENT_INITIAL
;
4313 static sd_event_source
* event_next_pending(sd_event
*e
) {
4318 p
= prioq_peek(e
->pending
);
4322 if (event_source_is_offline(p
))
4328 static int arm_watchdog(sd_event
*e
) {
4329 struct itimerspec its
= {};
4333 assert(e
->watchdog_fd
>= 0);
4335 t
= sleep_between(e
,
4336 usec_add(e
->watchdog_last
, (e
->watchdog_period
/ 2)),
4337 usec_add(e
->watchdog_last
, (e
->watchdog_period
* 3 / 4)));
4339 timespec_store(&its
.it_value
, t
);
4341 /* Make sure we never set the watchdog to 0, which tells the
4342 * kernel to disable it. */
4343 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
4344 its
.it_value
.tv_nsec
= 1;
4346 return RET_NERRNO(timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
));
4349 static int process_watchdog(sd_event
*e
) {
4355 /* Don't notify watchdog too often */
4356 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
4359 sd_notify(false, "WATCHDOG=1");
4360 e
->watchdog_last
= e
->timestamp
.monotonic
;
4362 return arm_watchdog(e
);
4365 static void event_close_inode_data_fds(sd_event
*e
) {
4366 struct inode_data
*d
;
4370 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
4371 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
4372 * adjustments to the event source, such as changing the priority (which requires us to remove and re-add a watch
4373 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
4376 while ((d
= e
->inode_data_to_close_list
)) {
4378 d
->fd
= safe_close(d
->fd
);
4380 LIST_REMOVE(to_close
, e
->inode_data_to_close_list
, d
);
4384 static int event_memory_pressure_write_list(sd_event
*e
) {
4392 s
= LIST_POP(memory_pressure
.write_list
, e
->memory_pressure_write_list
);
4396 assert(s
->type
== SOURCE_MEMORY_PRESSURE
);
4397 assert(s
->memory_pressure
.write_buffer_size
> 0);
4398 s
->memory_pressure
.in_write_list
= false;
4400 r
= source_memory_pressure_write(s
);
4408 _public_
int sd_event_prepare(sd_event
*e
) {
4411 assert_return(e
, -EINVAL
);
4412 assert_return(e
= event_resolve(e
), -ENOPKG
);
4413 assert_return(!event_pid_changed(e
), -ECHILD
);
4414 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4415 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
4417 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
4418 * this check here once, since gettid() is typically not cached, and thus want to minimize
4420 assert_return(!e
->default_event_ptr
|| e
->tid
== gettid(), -EREMOTEIO
);
4422 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4425 if (e
->exit_requested
)
4430 e
->state
= SD_EVENT_PREPARING
;
4431 r
= event_prepare(e
);
4432 e
->state
= SD_EVENT_INITIAL
;
4436 r
= event_memory_pressure_write_list(e
);
4440 r
= event_arm_timer(e
, &e
->realtime
);
4444 r
= event_arm_timer(e
, &e
->boottime
);
4448 r
= event_arm_timer(e
, &e
->monotonic
);
4452 r
= event_arm_timer(e
, &e
->realtime_alarm
);
4456 r
= event_arm_timer(e
, &e
->boottime_alarm
);
4460 event_close_inode_data_fds(e
);
4462 if (event_next_pending(e
) || e
->need_process_child
|| e
->buffered_inotify_data_list
)
4465 e
->state
= SD_EVENT_ARMED
;
4470 e
->state
= SD_EVENT_ARMED
;
4471 r
= sd_event_wait(e
, 0);
4473 e
->state
= SD_EVENT_ARMED
;
4478 static int epoll_wait_usec(
4480 struct epoll_event
*events
,
4485 /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not. */
4487 #if HAVE_EPOLL_PWAIT2
4488 static bool epoll_pwait2_absent
= false;
4491 /* epoll_pwait2() was added to Linux 5.11 (2021-02-14) and to glibc in 2.35 (2022-02-03). In contrast
4492 * to other syscalls we don't bother with our own fallback syscall wrappers on old libcs, since this
4493 * is not that obvious to implement given the libc and kernel definitions differ in the last
4494 * argument. Moreover, the only reason to use it is the more accurate time-outs (which is not a
4495 * biggie), let's hence rely on glibc's definitions, and fallback to epoll_pwait() when that's
4498 if (!epoll_pwait2_absent
&& timeout
!= USEC_INFINITY
) {
4499 r
= epoll_pwait2(fd
,
4502 TIMESPEC_STORE(timeout
),
4506 if (!ERRNO_IS_NOT_SUPPORTED(errno
) && !ERRNO_IS_PRIVILEGE(errno
))
4507 return -errno
; /* Only fallback to old epoll_wait() if the syscall is masked or not
4510 epoll_pwait2_absent
= true;
4514 if (timeout
== USEC_INFINITY
)
4519 k
= DIV_ROUND_UP(timeout
, USEC_PER_MSEC
);
4521 msec
= INT_MAX
; /* Saturate */
4526 return RET_NERRNO(epoll_wait(fd
, events
, maxevents
, msec
));
4529 static int process_epoll(sd_event
*e
, usec_t timeout
, int64_t threshold
, int64_t *ret_min_priority
) {
4530 size_t n_event_queue
, m
, n_event_max
;
4531 int64_t min_priority
= threshold
;
4532 bool something_new
= false;
4536 assert(ret_min_priority
);
4538 n_event_queue
= MAX(e
->n_sources
, 1u);
4539 if (!GREEDY_REALLOC(e
->event_queue
, n_event_queue
))
4542 n_event_max
= MALLOC_ELEMENTSOF(e
->event_queue
);
4544 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
4545 if (e
->buffered_inotify_data_list
)
4549 r
= epoll_wait_usec(
4559 if (m
< n_event_max
)
4562 if (n_event_max
>= n_event_queue
* 10)
4565 if (!GREEDY_REALLOC(e
->event_queue
, n_event_max
+ n_event_queue
))
4568 n_event_max
= MALLOC_ELEMENTSOF(e
->event_queue
);
4572 /* Set timestamp only when this is called first time. */
4573 if (threshold
== INT64_MAX
)
4574 triple_timestamp_get(&e
->timestamp
);
4576 for (size_t i
= 0; i
< m
; i
++) {
4578 if (e
->event_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
4579 r
= flush_timer(e
, e
->watchdog_fd
, e
->event_queue
[i
].events
, NULL
);
4581 WakeupType
*t
= e
->event_queue
[i
].data
.ptr
;
4585 case WAKEUP_EVENT_SOURCE
: {
4586 sd_event_source
*s
= e
->event_queue
[i
].data
.ptr
;
4590 if (s
->priority
> threshold
)
4593 min_priority
= MIN(min_priority
, s
->priority
);
4598 r
= process_io(e
, s
, e
->event_queue
[i
].events
);
4602 r
= process_pidfd(e
, s
, e
->event_queue
[i
].events
);
4605 case SOURCE_MEMORY_PRESSURE
:
4606 r
= process_memory_pressure(s
, e
->event_queue
[i
].events
);
4610 assert_not_reached();
4616 case WAKEUP_CLOCK_DATA
: {
4617 struct clock_data
*d
= e
->event_queue
[i
].data
.ptr
;
4621 r
= flush_timer(e
, d
->fd
, e
->event_queue
[i
].events
, &d
->next
);
4625 case WAKEUP_SIGNAL_DATA
:
4626 r
= process_signal(e
, e
->event_queue
[i
].data
.ptr
, e
->event_queue
[i
].events
, &min_priority
);
4629 case WAKEUP_INOTIFY_DATA
:
4630 r
= event_inotify_data_read(e
, e
->event_queue
[i
].data
.ptr
, e
->event_queue
[i
].events
, threshold
);
4634 assert_not_reached();
4640 something_new
= true;
4643 *ret_min_priority
= min_priority
;
4644 return something_new
;
4647 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
4650 assert_return(e
, -EINVAL
);
4651 assert_return(e
= event_resolve(e
), -ENOPKG
);
4652 assert_return(!event_pid_changed(e
), -ECHILD
);
4653 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4654 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
4656 if (e
->exit_requested
) {
4657 e
->state
= SD_EVENT_PENDING
;
4661 for (int64_t threshold
= INT64_MAX
; ; threshold
--) {
4662 int64_t epoll_min_priority
, child_min_priority
;
4664 /* There may be a possibility that new epoll (especially IO) and child events are
4665 * triggered just after process_epoll() call but before process_child(), and the new IO
4666 * events may have higher priority than the child events. To salvage these events,
4667 * let's call epoll_wait() again, but accepts only events with higher priority than the
4668 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
4669 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
4670 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
4672 r
= process_epoll(e
, timeout
, threshold
, &epoll_min_priority
);
4674 e
->state
= SD_EVENT_PENDING
;
4679 if (r
== 0 && threshold
< INT64_MAX
)
4680 /* No new epoll event. */
4683 r
= process_child(e
, threshold
, &child_min_priority
);
4687 /* No new child event. */
4690 threshold
= MIN(epoll_min_priority
, child_min_priority
);
4691 if (threshold
== INT64_MIN
)
4697 r
= process_watchdog(e
);
4701 r
= process_inotify(e
);
4705 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
4709 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime
);
4713 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
4717 r
= process_timer(e
, e
->timestamp
.boottime
, &e
->boottime_alarm
);
4721 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
4725 /* Ratelimit expiry callback was called. Let's postpone processing pending sources and
4726 * put loop in the initial state in order to evaluate (in the next iteration) also sources
4727 * there were potentially re-enabled by the callback.
4729 * Wondering why we treat only this invocation of process_timer() differently? Once event
4730 * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence
4731 * ratelimit expiry callback is never called for any other timer type. */
4736 if (event_next_pending(e
)) {
4737 e
->state
= SD_EVENT_PENDING
;
4744 e
->state
= SD_EVENT_INITIAL
;
4749 _public_
int sd_event_dispatch(sd_event
*e
) {
4753 assert_return(e
, -EINVAL
);
4754 assert_return(e
= event_resolve(e
), -ENOPKG
);
4755 assert_return(!event_pid_changed(e
), -ECHILD
);
4756 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4757 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
4759 if (e
->exit_requested
)
4760 return dispatch_exit(e
);
4762 p
= event_next_pending(e
);
4766 e
->state
= SD_EVENT_RUNNING
;
4767 r
= source_dispatch(p
);
4768 e
->state
= SD_EVENT_INITIAL
;
4772 e
->state
= SD_EVENT_INITIAL
;
4777 static void event_log_delays(sd_event
*e
) {
4778 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1], *p
;
4783 for (i
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
4784 l
= strpcpyf(&p
, l
, "%u ", e
->delays
[i
]);
4787 log_debug("Event loop iterations: %s", b
);
4790 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
4793 assert_return(e
, -EINVAL
);
4794 assert_return(e
= event_resolve(e
), -ENOPKG
);
4795 assert_return(!event_pid_changed(e
), -ECHILD
);
4796 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4797 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
4799 if (e
->profile_delays
&& e
->last_run_usec
!= 0) {
4803 this_run
= now(CLOCK_MONOTONIC
);
4805 l
= log2u64(this_run
- e
->last_run_usec
);
4806 assert(l
< ELEMENTSOF(e
->delays
));
4809 if (this_run
- e
->last_log_usec
>= 5*USEC_PER_SEC
) {
4810 event_log_delays(e
);
4811 e
->last_log_usec
= this_run
;
4815 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4818 r
= sd_event_prepare(e
);
4820 /* There was nothing? Then wait... */
4821 r
= sd_event_wait(e
, timeout
);
4823 if (e
->profile_delays
)
4824 e
->last_run_usec
= now(CLOCK_MONOTONIC
);
4827 /* There's something now, then let's dispatch it */
4828 r
= sd_event_dispatch(e
);
4838 _public_
int sd_event_loop(sd_event
*e
) {
4841 assert_return(e
, -EINVAL
);
4842 assert_return(e
= event_resolve(e
), -ENOPKG
);
4843 assert_return(!event_pid_changed(e
), -ECHILD
);
4844 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
4848 while (e
->state
!= SD_EVENT_FINISHED
) {
4849 r
= sd_event_run(e
, UINT64_MAX
);
4854 return e
->exit_code
;
4857 _public_
int sd_event_get_fd(sd_event
*e
) {
4858 assert_return(e
, -EINVAL
);
4859 assert_return(e
= event_resolve(e
), -ENOPKG
);
4860 assert_return(!event_pid_changed(e
), -ECHILD
);
4865 _public_
int sd_event_get_state(sd_event
*e
) {
4866 assert_return(e
, -EINVAL
);
4867 assert_return(e
= event_resolve(e
), -ENOPKG
);
4868 assert_return(!event_pid_changed(e
), -ECHILD
);
4873 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
4874 assert_return(e
, -EINVAL
);
4875 assert_return(e
= event_resolve(e
), -ENOPKG
);
4876 assert_return(code
, -EINVAL
);
4877 assert_return(!event_pid_changed(e
), -ECHILD
);
4879 if (!e
->exit_requested
)
4882 *code
= e
->exit_code
;
4886 _public_
int sd_event_exit(sd_event
*e
, int code
) {
4887 assert_return(e
, -EINVAL
);
4888 assert_return(e
= event_resolve(e
), -ENOPKG
);
4889 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
4890 assert_return(!event_pid_changed(e
), -ECHILD
);
4892 e
->exit_requested
= true;
4893 e
->exit_code
= code
;
4898 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
4899 assert_return(e
, -EINVAL
);
4900 assert_return(e
= event_resolve(e
), -ENOPKG
);
4901 assert_return(usec
, -EINVAL
);
4902 assert_return(!event_pid_changed(e
), -ECHILD
);
4904 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock
))
4907 if (!triple_timestamp_is_set(&e
->timestamp
)) {
4908 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
4913 *usec
= triple_timestamp_by_clock(&e
->timestamp
, clock
);
4917 _public_
int sd_event_default(sd_event
**ret
) {
4922 return !!default_event
;
4924 if (default_event
) {
4925 *ret
= sd_event_ref(default_event
);
4929 r
= sd_event_new(&e
);
4933 e
->default_event_ptr
= &default_event
;
4941 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
4942 assert_return(e
, -EINVAL
);
4943 assert_return(e
= event_resolve(e
), -ENOPKG
);
4944 assert_return(tid
, -EINVAL
);
4945 assert_return(!event_pid_changed(e
), -ECHILD
);
4955 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
4958 assert_return(e
, -EINVAL
);
4959 assert_return(e
= event_resolve(e
), -ENOPKG
);
4960 assert_return(!event_pid_changed(e
), -ECHILD
);
4962 if (e
->watchdog
== !!b
)
4966 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
4970 /* Issue first ping immediately */
4971 sd_notify(false, "WATCHDOG=1");
4972 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
4974 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
4975 if (e
->watchdog_fd
< 0)
4978 r
= arm_watchdog(e
);
4982 struct epoll_event ev
= {
4984 .data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
),
4987 if (epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
) < 0) {
4993 if (e
->watchdog_fd
>= 0) {
4994 (void) epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
4995 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
5003 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
5007 _public_
int sd_event_get_watchdog(sd_event
*e
) {
5008 assert_return(e
, -EINVAL
);
5009 assert_return(e
= event_resolve(e
), -ENOPKG
);
5010 assert_return(!event_pid_changed(e
), -ECHILD
);
5015 _public_
int sd_event_get_iteration(sd_event
*e
, uint64_t *ret
) {
5016 assert_return(e
, -EINVAL
);
5017 assert_return(e
= event_resolve(e
), -ENOPKG
);
5018 assert_return(!event_pid_changed(e
), -ECHILD
);
5020 *ret
= e
->iteration
;
5024 _public_
int sd_event_source_set_destroy_callback(sd_event_source
*s
, sd_event_destroy_t callback
) {
5025 assert_return(s
, -EINVAL
);
5027 s
->destroy_callback
= callback
;
5031 _public_
int sd_event_source_get_destroy_callback(sd_event_source
*s
, sd_event_destroy_t
*ret
) {
5032 assert_return(s
, -EINVAL
);
5035 *ret
= s
->destroy_callback
;
5037 return !!s
->destroy_callback
;
5040 _public_
int sd_event_source_get_floating(sd_event_source
*s
) {
5041 assert_return(s
, -EINVAL
);
5046 _public_
int sd_event_source_set_floating(sd_event_source
*s
, int b
) {
5047 assert_return(s
, -EINVAL
);
5049 if (s
->floating
== !!b
)
5052 if (!s
->event
) /* Already disconnected */
5058 sd_event_source_ref(s
);
5059 sd_event_unref(s
->event
);
5061 sd_event_ref(s
->event
);
5062 sd_event_source_unref(s
);
5068 _public_
int sd_event_source_get_exit_on_failure(sd_event_source
*s
) {
5069 assert_return(s
, -EINVAL
);
5070 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
5072 return s
->exit_on_failure
;
5075 _public_
int sd_event_source_set_exit_on_failure(sd_event_source
*s
, int b
) {
5076 assert_return(s
, -EINVAL
);
5077 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
5079 if (s
->exit_on_failure
== !!b
)
5082 s
->exit_on_failure
= b
;
5086 _public_
int sd_event_source_set_ratelimit(sd_event_source
*s
, uint64_t interval
, unsigned burst
) {
5089 assert_return(s
, -EINVAL
);
5091 /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
5092 * so is a programming error. */
5093 assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s
->type
), -EDOM
);
5095 /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
5096 * non-ratelimited. */
5097 r
= event_source_leave_ratelimit(s
, /* run_callback */ false);
5101 s
->rate_limit
= (RateLimit
) { interval
, burst
};
5105 _public_
int sd_event_source_set_ratelimit_expire_callback(sd_event_source
*s
, sd_event_handler_t callback
) {
5106 assert_return(s
, -EINVAL
);
5108 s
->ratelimit_expire_callback
= callback
;
5112 _public_
int sd_event_source_get_ratelimit(sd_event_source
*s
, uint64_t *ret_interval
, unsigned *ret_burst
) {
5113 assert_return(s
, -EINVAL
);
5115 /* Querying whether an event source has ratelimiting configured is not a loggable offense, hence
5116 * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error. */
5117 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s
->type
))
5120 if (!ratelimit_configured(&s
->rate_limit
))
5124 *ret_interval
= s
->rate_limit
.interval
;
5126 *ret_burst
= s
->rate_limit
.burst
;
5131 _public_
int sd_event_source_is_ratelimited(sd_event_source
*s
) {
5132 assert_return(s
, -EINVAL
);
5134 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s
->type
))
5137 if (!ratelimit_configured(&s
->rate_limit
))
5140 return s
->ratelimited
;
5143 _public_
int sd_event_set_signal_exit(sd_event
*e
, int b
) {
5144 bool change
= false;
5147 assert_return(e
, -EINVAL
);
5150 /* We want to maintain pointers to these event sources, so that we can destroy them when told
5151 * so. But we also don't want them to pin the event loop itself. Hence we mark them as
5152 * floating after creation (and undo this before deleting them again). */
5154 if (!e
->sigint_event_source
) {
5155 r
= sd_event_add_signal(e
, &e
->sigint_event_source
, SIGINT
| SD_EVENT_SIGNAL_PROCMASK
, NULL
, NULL
);
5159 assert(sd_event_source_set_floating(e
->sigint_event_source
, true) >= 0);
5163 if (!e
->sigterm_event_source
) {
5164 r
= sd_event_add_signal(e
, &e
->sigterm_event_source
, SIGTERM
| SD_EVENT_SIGNAL_PROCMASK
, NULL
, NULL
);
5167 assert(sd_event_source_set_floating(e
->sigint_event_source
, false) >= 0);
5168 e
->sigint_event_source
= sd_event_source_unref(e
->sigint_event_source
);
5174 assert(sd_event_source_set_floating(e
->sigterm_event_source
, true) >= 0);
5179 if (e
->sigint_event_source
) {
5180 assert(sd_event_source_set_floating(e
->sigint_event_source
, false) >= 0);
5181 e
->sigint_event_source
= sd_event_source_unref(e
->sigint_event_source
);
5185 if (e
->sigterm_event_source
) {
5186 assert(sd_event_source_set_floating(e
->sigterm_event_source
, false) >= 0);
5187 e
->sigterm_event_source
= sd_event_source_unref(e
->sigterm_event_source
);
5195 _public_
int sd_event_source_set_memory_pressure_type(sd_event_source
*s
, const char *ty
) {
5196 _cleanup_free_
char *b
= NULL
;
5197 _cleanup_free_
void *w
= NULL
;
5199 assert_return(s
, -EINVAL
);
5200 assert_return(s
->type
== SOURCE_MEMORY_PRESSURE
, -EDOM
);
5201 assert_return(ty
, -EINVAL
);
5203 if (!STR_IN_SET(ty
, "some", "full"))
5206 if (s
->memory_pressure
.locked
) /* Refuse adjusting parameters, if caller told us how to watch for events */
5209 char* space
= memchr(s
->memory_pressure
.write_buffer
, ' ', s
->memory_pressure
.write_buffer_size
);
5213 size_t l
= (char*) space
- (char*) s
->memory_pressure
.write_buffer
;
5214 b
= memdup_suffix0(s
->memory_pressure
.write_buffer
, l
);
5217 if (!STR_IN_SET(b
, "some", "full"))
5223 size_t nl
= strlen(ty
) + (s
->memory_pressure
.write_buffer_size
- l
);
5228 memcpy(stpcpy(w
, ty
), space
, (s
->memory_pressure
.write_buffer_size
- l
));
5230 free_and_replace(s
->memory_pressure
.write_buffer
, w
);
5231 s
->memory_pressure
.write_buffer_size
= nl
;
5232 s
->memory_pressure
.locked
= false;
5237 _public_
int sd_event_source_set_memory_pressure_period(sd_event_source
*s
, uint64_t threshold_usec
, uint64_t window_usec
) {
5238 _cleanup_free_
char *b
= NULL
;
5239 _cleanup_free_
void *w
= NULL
;
5241 assert_return(s
, -EINVAL
);
5242 assert_return(s
->type
== SOURCE_MEMORY_PRESSURE
, -EDOM
);
5244 if (threshold_usec
<= 0 || threshold_usec
>= UINT64_MAX
)
5246 if (window_usec
<= 0 || window_usec
>= UINT64_MAX
)
5248 if (threshold_usec
> window_usec
)
5251 if (s
->memory_pressure
.locked
) /* Refuse adjusting parameters, if caller told us how to watch for events */
5254 char* space
= memchr(s
->memory_pressure
.write_buffer
, ' ', s
->memory_pressure
.write_buffer_size
);
5258 size_t l
= (char*) space
- (char*) s
->memory_pressure
.write_buffer
;
5259 b
= memdup_suffix0(s
->memory_pressure
.write_buffer
, l
);
5262 if (!STR_IN_SET(b
, "some", "full"))
5265 if (asprintf((char**) &w
,
5266 "%s " USEC_FMT
" " USEC_FMT
"",
5273 if (memcmp_nn(s
->memory_pressure
.write_buffer
, s
->memory_pressure
.write_buffer_size
, w
, l
) == 0)
5276 free_and_replace(s
->memory_pressure
.write_buffer
, w
);
5277 s
->memory_pressure
.write_buffer_size
= l
;
5278 s
->memory_pressure
.locked
= false;