]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
macro: add macro for determining size of struct with trailing union
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
28e5e1e9 16#include "glyph-util.h"
fd38203a 17#include "hashmap.h"
07630cea 18#include "list.h"
3ae6b3bf 19#include "logarithm.h"
07630cea 20#include "macro.h"
0a970718 21#include "memory-util.h"
f5947a5e 22#include "missing_syscall.h"
07630cea 23#include "prioq.h"
4a0b58c4 24#include "process-util.h"
6e9feda3 25#include "set.h"
24882e06 26#include "signal-util.h"
55cbfaa5 27#include "string-table.h"
07630cea 28#include "string-util.h"
442ac269 29#include "strxcpyx.h"
07630cea 30#include "time-util.h"
fd38203a 31
c2ba3ad6 32#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 33
f8f3f926
LP
34static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
35 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
36 return s &&
37 s->type == SOURCE_CHILD &&
38 s->child.pidfd >= 0 &&
39 s->child.options == WEXITED;
40}
41
b6d5481b
LP
42static bool event_source_is_online(sd_event_source *s) {
43 assert(s);
44 return s->enabled != SD_EVENT_OFF && !s->ratelimited;
45}
46
47static bool event_source_is_offline(sd_event_source *s) {
48 assert(s);
49 return s->enabled == SD_EVENT_OFF || s->ratelimited;
50}
51
55cbfaa5 52static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
3f3548f8
ZJS
53 [SOURCE_IO] = "io",
54 [SOURCE_TIME_REALTIME] = "realtime",
55 [SOURCE_TIME_BOOTTIME] = "bootime",
56 [SOURCE_TIME_MONOTONIC] = "monotonic",
55cbfaa5
DM
57 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
58 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
3f3548f8
ZJS
59 [SOURCE_SIGNAL] = "signal",
60 [SOURCE_CHILD] = "child",
61 [SOURCE_DEFER] = "defer",
62 [SOURCE_POST] = "post",
63 [SOURCE_EXIT] = "exit",
64 [SOURCE_WATCHDOG] = "watchdog",
65 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
66};
67
68DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
69
b6d5481b
LP
70#define EVENT_SOURCE_IS_TIME(t) \
71 IN_SET((t), \
72 SOURCE_TIME_REALTIME, \
73 SOURCE_TIME_BOOTTIME, \
74 SOURCE_TIME_MONOTONIC, \
75 SOURCE_TIME_REALTIME_ALARM, \
76 SOURCE_TIME_BOOTTIME_ALARM)
77
78#define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
79 IN_SET((t), \
80 SOURCE_IO, \
81 SOURCE_TIME_REALTIME, \
82 SOURCE_TIME_BOOTTIME, \
83 SOURCE_TIME_MONOTONIC, \
84 SOURCE_TIME_REALTIME_ALARM, \
85 SOURCE_TIME_BOOTTIME_ALARM, \
86 SOURCE_SIGNAL, \
87 SOURCE_DEFER, \
88 SOURCE_INOTIFY)
6a0f1f6d 89
19947509
ZJS
90/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
91 * Time sources and ratelimited sources can be passed, so effectively this is the same as the
92 * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */
93#define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t)
94
fd38203a 95struct sd_event {
da7e457c 96 unsigned n_ref;
fd38203a
LP
97
98 int epoll_fd;
cde93897 99 int watchdog_fd;
fd38203a
LP
100
101 Prioq *pending;
102 Prioq *prepare;
c2ba3ad6 103
a8548816 104 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
105 * can add support for more clocks when the kernel learns to
106 * deal with them, too. */
107 struct clock_data realtime;
a8548816 108 struct clock_data boottime;
6a0f1f6d
LP
109 struct clock_data monotonic;
110 struct clock_data realtime_alarm;
111 struct clock_data boottime_alarm;
fd38203a 112
da7e457c
LP
113 usec_t perturb;
114
9da4cb2b
LP
115 sd_event_source **signal_sources; /* indexed by signal number */
116 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
117
118 Hashmap *child_sources;
b6d5481b 119 unsigned n_online_child_sources;
fd38203a 120
6e9feda3
LP
121 Set *post_sources;
122
6203e07a 123 Prioq *exit;
fd38203a 124
97ef5391
LP
125 Hashmap *inotify_data; /* indexed by priority */
126
127 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
ed828563 128 LIST_HEAD(struct inode_data, inode_data_to_close_list);
97ef5391
LP
129
130 /* A list of inotify objects that already have events buffered which aren't processed yet */
0601b958 131 LIST_HEAD(struct inotify_data, buffered_inotify_data_list);
97ef5391 132
da7e457c 133 pid_t original_pid;
c2ba3ad6 134
60a3b1e1 135 uint64_t iteration;
e475d10c 136 triple_timestamp timestamp;
da7e457c 137 int state;
eaa3cbef 138
6203e07a 139 bool exit_requested:1;
da7e457c 140 bool need_process_child:1;
cde93897 141 bool watchdog:1;
34b87517 142 bool profile_delays:1;
afc6adb5 143
6203e07a
LP
144 int exit_code;
145
afc6adb5
LP
146 pid_t tid;
147 sd_event **default_event_ptr;
cde93897
LP
148
149 usec_t watchdog_last, watchdog_period;
15b38f93
LP
150
151 unsigned n_sources;
a71fe8b8 152
5cddd924 153 struct epoll_event *event_queue;
5cddd924 154
a71fe8b8 155 LIST_HEAD(sd_event_source, sources);
34b87517 156
baf3fdec
LP
157 sd_event_source *sigint_event_source, *sigterm_event_source;
158
e6a7bee5 159 usec_t last_run_usec, last_log_usec;
34b87517 160 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
161};
162
b937d761
NM
163static thread_local sd_event *default_event = NULL;
164
a71fe8b8 165static void source_disconnect(sd_event_source *s);
97ef5391 166static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 167
b937d761
NM
168static sd_event *event_resolve(sd_event *e) {
169 return e == SD_EVENT_DEFAULT ? default_event : e;
170}
171
fd38203a
LP
172static int pending_prioq_compare(const void *a, const void *b) {
173 const sd_event_source *x = a, *y = b;
9c57a73b 174 int r;
fd38203a
LP
175
176 assert(x->pending);
177 assert(y->pending);
178
baf76283 179 /* Enabled ones first */
06e13147
YW
180 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
181 if (r != 0)
182 return r;
fd38203a 183
b6d5481b
LP
184 /* Non rate-limited ones first. */
185 r = CMP(!!x->ratelimited, !!y->ratelimited);
186 if (r != 0)
187 return r;
188
fd38203a 189 /* Lower priority values first */
9c57a73b
YW
190 r = CMP(x->priority, y->priority);
191 if (r != 0)
192 return r;
fd38203a
LP
193
194 /* Older entries first */
9c57a73b 195 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
196}
197
198static int prepare_prioq_compare(const void *a, const void *b) {
199 const sd_event_source *x = a, *y = b;
9c57a73b 200 int r;
fd38203a
LP
201
202 assert(x->prepare);
203 assert(y->prepare);
204
8046c457 205 /* Enabled ones first */
06e13147
YW
206 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
207 if (r != 0)
208 return r;
8046c457 209
b6d5481b
LP
210 /* Non rate-limited ones first. */
211 r = CMP(!!x->ratelimited, !!y->ratelimited);
212 if (r != 0)
213 return r;
214
fd38203a
LP
215 /* Move most recently prepared ones last, so that we can stop
216 * preparing as soon as we hit one that has already been
217 * prepared in the current iteration */
9c57a73b
YW
218 r = CMP(x->prepare_iteration, y->prepare_iteration);
219 if (r != 0)
220 return r;
fd38203a 221
fd38203a 222 /* Lower priority values first */
9c57a73b 223 return CMP(x->priority, y->priority);
fd38203a
LP
224}
225
b6d5481b
LP
226static usec_t time_event_source_next(const sd_event_source *s) {
227 assert(s);
228
229 /* We have two kinds of event sources that have elapsation times associated with them: the actual
230 * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
231 * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
232 * looking at here. */
233
234 if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
235 assert(s->rate_limit.begin != 0);
236 assert(s->rate_limit.interval != 0);
237 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
238 }
239
240 /* Otherwise this must be a time event source, if not ratelimited */
241 if (EVENT_SOURCE_IS_TIME(s->type))
242 return s->time.next;
243
244 return USEC_INFINITY;
245}
246
1bce0ffa 247static usec_t time_event_source_latest(const sd_event_source *s) {
b6d5481b
LP
248 assert(s);
249
250 if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the
251 * same, as we should avoid adding additional inaccuracy on an inaccuracy time
252 * window */
253 assert(s->rate_limit.begin != 0);
254 assert(s->rate_limit.interval != 0);
255 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
256 }
257
258 /* Must be a time event source, if not ratelimited */
259 if (EVENT_SOURCE_IS_TIME(s->type))
260 return usec_add(s->time.next, s->time.accuracy);
261
262 return USEC_INFINITY;
1bce0ffa
LP
263}
264
81107b84
LP
265static bool event_source_timer_candidate(const sd_event_source *s) {
266 assert(s);
267
268 /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
269 * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
270 return !s->pending || s->ratelimited;
271}
272
273static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) {
c2ba3ad6 274 const sd_event_source *x = a, *y = b;
06e13147 275 int r;
c2ba3ad6 276
baf76283 277 /* Enabled ones first */
06e13147
YW
278 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
279 if (r != 0)
280 return r;
c2ba3ad6 281
81107b84 282 /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
06e13147
YW
283 r = CMP(!event_source_timer_candidate(x), !event_source_timer_candidate(y));
284 if (r != 0)
285 return r;
c2ba3ad6
LP
286
287 /* Order by time */
81107b84
LP
288 return CMP(time_func(x), time_func(y));
289}
290
291static int earliest_time_prioq_compare(const void *a, const void *b) {
292 return time_prioq_compare(a, b, time_event_source_next);
293}
294
295static int latest_time_prioq_compare(const void *a, const void *b) {
296 return time_prioq_compare(a, b, time_event_source_latest);
c2ba3ad6
LP
297}
298
6203e07a 299static int exit_prioq_compare(const void *a, const void *b) {
da7e457c 300 const sd_event_source *x = a, *y = b;
06e13147 301 int r;
da7e457c 302
6203e07a
LP
303 assert(x->type == SOURCE_EXIT);
304 assert(y->type == SOURCE_EXIT);
da7e457c 305
baf76283 306 /* Enabled ones first */
06e13147
YW
307 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
308 if (r != 0)
309 return r;
da7e457c
LP
310
311 /* Lower priority values first */
6dd91b36 312 return CMP(x->priority, y->priority);
da7e457c
LP
313}
314
6a0f1f6d
LP
315static void free_clock_data(struct clock_data *d) {
316 assert(d);
9da4cb2b 317 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
318
319 safe_close(d->fd);
320 prioq_free(d->earliest);
321 prioq_free(d->latest);
322}
323
8301aa0b 324static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
325 sd_event_source *s;
326
fd38203a 327 assert(e);
a71fe8b8 328
baf3fdec
LP
329 e->sigterm_event_source = sd_event_source_unref(e->sigterm_event_source);
330 e->sigint_event_source = sd_event_source_unref(e->sigint_event_source);
331
a71fe8b8
LP
332 while ((s = e->sources)) {
333 assert(s->floating);
334 source_disconnect(s);
335 sd_event_source_unref(s);
336 }
337
15b38f93 338 assert(e->n_sources == 0);
fd38203a 339
afc6adb5
LP
340 if (e->default_event_ptr)
341 *(e->default_event_ptr) = NULL;
342
03e334a1 343 safe_close(e->epoll_fd);
03e334a1 344 safe_close(e->watchdog_fd);
cde93897 345
6a0f1f6d 346 free_clock_data(&e->realtime);
a8548816 347 free_clock_data(&e->boottime);
6a0f1f6d
LP
348 free_clock_data(&e->monotonic);
349 free_clock_data(&e->realtime_alarm);
350 free_clock_data(&e->boottime_alarm);
351
fd38203a
LP
352 prioq_free(e->pending);
353 prioq_free(e->prepare);
6203e07a 354 prioq_free(e->exit);
fd38203a
LP
355
356 free(e->signal_sources);
9da4cb2b 357 hashmap_free(e->signal_data);
fd38203a 358
97ef5391
LP
359 hashmap_free(e->inotify_data);
360
fd38203a 361 hashmap_free(e->child_sources);
6e9feda3 362 set_free(e->post_sources);
8301aa0b 363
5cddd924
LP
364 free(e->event_queue);
365
8301aa0b 366 return mfree(e);
fd38203a
LP
367}
368
f7262a9f 369_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
370 sd_event *e;
371 int r;
372
305f78bf 373 assert_return(ret, -EINVAL);
fd38203a 374
d08eb1fa 375 e = new(sd_event, 1);
fd38203a
LP
376 if (!e)
377 return -ENOMEM;
378
d08eb1fa
LP
379 *e = (sd_event) {
380 .n_ref = 1,
254d1313
ZJS
381 .epoll_fd = -EBADF,
382 .watchdog_fd = -EBADF,
d08eb1fa 383 .realtime.wakeup = WAKEUP_CLOCK_DATA,
254d1313 384 .realtime.fd = -EBADF,
d08eb1fa
LP
385 .realtime.next = USEC_INFINITY,
386 .boottime.wakeup = WAKEUP_CLOCK_DATA,
254d1313 387 .boottime.fd = -EBADF,
d08eb1fa
LP
388 .boottime.next = USEC_INFINITY,
389 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
254d1313 390 .monotonic.fd = -EBADF,
d08eb1fa
LP
391 .monotonic.next = USEC_INFINITY,
392 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
254d1313 393 .realtime_alarm.fd = -EBADF,
d08eb1fa
LP
394 .realtime_alarm.next = USEC_INFINITY,
395 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
254d1313 396 .boottime_alarm.fd = -EBADF,
d08eb1fa
LP
397 .boottime_alarm.next = USEC_INFINITY,
398 .perturb = USEC_INFINITY,
399 .original_pid = getpid_cached(),
400 };
fd38203a 401
c983e776
EV
402 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
403 if (r < 0)
fd38203a 404 goto fail;
fd38203a
LP
405
406 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
407 if (e->epoll_fd < 0) {
408 r = -errno;
409 goto fail;
410 }
411
7fe2903c
LP
412 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
413
34b87517 414 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
28e5e1e9
DT
415 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 %s 2^63 us will be logged every 5s.",
416 special_glyph(SPECIAL_GLYPH_ELLIPSIS));
34b87517
VC
417 e->profile_delays = true;
418 }
419
fd38203a
LP
420 *ret = e;
421 return 0;
422
423fail:
424 event_free(e);
425 return r;
426}
427
8301aa0b 428DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
c8e9d15c
YW
429#define PROTECT_EVENT(e) \
430 _unused_ _cleanup_(sd_event_unrefp) sd_event *_ref = sd_event_ref(e);
fd38203a 431
afd15bbb
ZJS
432_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
433 if (s)
434 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
435 return sd_event_source_unref(s);
436}
437
eaa3cbef
LP
438static bool event_pid_changed(sd_event *e) {
439 assert(e);
440
a2360a46 441 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
442 * it around over a fork(). Let's complain. */
443
df0ff127 444 return e->original_pid != getpid_cached();
eaa3cbef
LP
445}
446
366e6411 447static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
448 assert(s);
449 assert(s->type == SOURCE_IO);
450
f6806734 451 if (event_pid_changed(s->event))
366e6411 452 return;
f6806734 453
fd38203a 454 if (!s->io.registered)
366e6411 455 return;
fd38203a 456
d1cf2023 457 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
f80a5d6a 458 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
55cbfaa5 459 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
460
461 s->io.registered = false;
fd38203a
LP
462}
463
305f78bf
LP
464static int source_io_register(
465 sd_event_source *s,
466 int enabled,
467 uint32_t events) {
468
fd38203a
LP
469 assert(s);
470 assert(s->type == SOURCE_IO);
baf76283 471 assert(enabled != SD_EVENT_OFF);
fd38203a 472
1eac7948 473 struct epoll_event ev = {
a82f89aa
LP
474 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
475 .data.ptr = s,
476 };
fd38203a 477
15c689d7 478 if (epoll_ctl(s->event->epoll_fd,
1eac7948 479 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
55c540d3 480 s->io.fd, &ev) < 0)
fd38203a
LP
481 return -errno;
482
483 s->io.registered = true;
484
485 return 0;
486}
487
f8f3f926
LP
488static void source_child_pidfd_unregister(sd_event_source *s) {
489 assert(s);
490 assert(s->type == SOURCE_CHILD);
491
492 if (event_pid_changed(s->event))
493 return;
494
495 if (!s->child.registered)
496 return;
497
498 if (EVENT_SOURCE_WATCH_PIDFD(s))
499 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
f80a5d6a 500 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
f8f3f926
LP
501 strna(s->description), event_source_type_to_string(s->type));
502
503 s->child.registered = false;
504}
505
506static int source_child_pidfd_register(sd_event_source *s, int enabled) {
f8f3f926
LP
507 assert(s);
508 assert(s->type == SOURCE_CHILD);
509 assert(enabled != SD_EVENT_OFF);
510
511 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 512 struct epoll_event ev = {
f8f3f926
LP
513 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
514 .data.ptr = s,
515 };
516
55c540d3
ZJS
517 if (epoll_ctl(s->event->epoll_fd,
518 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
519 s->child.pidfd, &ev) < 0)
f8f3f926
LP
520 return -errno;
521 }
522
523 s->child.registered = true;
524 return 0;
525}
526
6a0f1f6d
LP
527static clockid_t event_source_type_to_clock(EventSourceType t) {
528
529 switch (t) {
530
531 case SOURCE_TIME_REALTIME:
532 return CLOCK_REALTIME;
533
a8548816
TG
534 case SOURCE_TIME_BOOTTIME:
535 return CLOCK_BOOTTIME;
536
6a0f1f6d
LP
537 case SOURCE_TIME_MONOTONIC:
538 return CLOCK_MONOTONIC;
539
540 case SOURCE_TIME_REALTIME_ALARM:
541 return CLOCK_REALTIME_ALARM;
542
543 case SOURCE_TIME_BOOTTIME_ALARM:
544 return CLOCK_BOOTTIME_ALARM;
545
546 default:
547 return (clockid_t) -1;
548 }
549}
550
551static EventSourceType clock_to_event_source_type(clockid_t clock) {
552
553 switch (clock) {
554
555 case CLOCK_REALTIME:
556 return SOURCE_TIME_REALTIME;
557
a8548816
TG
558 case CLOCK_BOOTTIME:
559 return SOURCE_TIME_BOOTTIME;
560
6a0f1f6d
LP
561 case CLOCK_MONOTONIC:
562 return SOURCE_TIME_MONOTONIC;
563
564 case CLOCK_REALTIME_ALARM:
565 return SOURCE_TIME_REALTIME_ALARM;
566
567 case CLOCK_BOOTTIME_ALARM:
568 return SOURCE_TIME_BOOTTIME_ALARM;
569
570 default:
571 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
572 }
573}
574
575static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
576 assert(e);
577
578 switch (t) {
579
580 case SOURCE_TIME_REALTIME:
581 return &e->realtime;
582
a8548816
TG
583 case SOURCE_TIME_BOOTTIME:
584 return &e->boottime;
585
6a0f1f6d
LP
586 case SOURCE_TIME_MONOTONIC:
587 return &e->monotonic;
588
589 case SOURCE_TIME_REALTIME_ALARM:
590 return &e->realtime_alarm;
591
592 case SOURCE_TIME_BOOTTIME_ALARM:
593 return &e->boottime_alarm;
594
595 default:
596 return NULL;
597 }
598}
599
3e4eb8e7
YW
600static void event_free_signal_data(sd_event *e, struct signal_data *d) {
601 assert(e);
602
603 if (!d)
604 return;
605
606 hashmap_remove(e->signal_data, &d->priority);
607 safe_close(d->fd);
608 free(d);
609}
610
9da4cb2b
LP
611static int event_make_signal_data(
612 sd_event *e,
613 int sig,
614 struct signal_data **ret) {
4807d2d0 615
9da4cb2b
LP
616 struct signal_data *d;
617 bool added = false;
618 sigset_t ss_copy;
619 int64_t priority;
f95387cd
ZJS
620 int r;
621
622 assert(e);
623
f6806734 624 if (event_pid_changed(e))
9da4cb2b 625 return -ECHILD;
f6806734 626
9da4cb2b
LP
627 if (e->signal_sources && e->signal_sources[sig])
628 priority = e->signal_sources[sig]->priority;
629 else
de05913d 630 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 631
9da4cb2b
LP
632 d = hashmap_get(e->signal_data, &priority);
633 if (d) {
634 if (sigismember(&d->sigset, sig) > 0) {
635 if (ret)
636 *ret = d;
637 return 0;
638 }
639 } else {
d08eb1fa 640 d = new(struct signal_data, 1);
9da4cb2b
LP
641 if (!d)
642 return -ENOMEM;
643
d08eb1fa
LP
644 *d = (struct signal_data) {
645 .wakeup = WAKEUP_SIGNAL_DATA,
254d1313 646 .fd = -EBADF,
d08eb1fa
LP
647 .priority = priority,
648 };
9da4cb2b 649
f656fdb6 650 r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d);
90f604d1
ZJS
651 if (r < 0) {
652 free(d);
9da4cb2b 653 return r;
90f604d1 654 }
f95387cd 655
9da4cb2b
LP
656 added = true;
657 }
658
659 ss_copy = d->sigset;
660 assert_se(sigaddset(&ss_copy, sig) >= 0);
661
cbff793f
ZJS
662 r = signalfd(d->fd >= 0 ? d->fd : -1, /* the first arg must be -1 or a valid signalfd */
663 &ss_copy,
664 SFD_NONBLOCK|SFD_CLOEXEC);
9da4cb2b
LP
665 if (r < 0) {
666 r = -errno;
667 goto fail;
668 }
669
670 d->sigset = ss_copy;
f95387cd 671
9da4cb2b
LP
672 if (d->fd >= 0) {
673 if (ret)
674 *ret = d;
f95387cd 675 return 0;
9da4cb2b
LP
676 }
677
7fe2903c 678 d->fd = fd_move_above_stdio(r);
f95387cd 679
1eac7948 680 struct epoll_event ev = {
a82f89aa
LP
681 .events = EPOLLIN,
682 .data.ptr = d,
683 };
f95387cd 684
15c689d7 685 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
686 r = -errno;
687 goto fail;
f95387cd
ZJS
688 }
689
9da4cb2b
LP
690 if (ret)
691 *ret = d;
692
f95387cd 693 return 0;
9da4cb2b
LP
694
695fail:
3e4eb8e7
YW
696 if (added)
697 event_free_signal_data(e, d);
9da4cb2b
LP
698
699 return r;
700}
701
702static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
703 assert(e);
704 assert(d);
705
706 /* Turns off the specified signal in the signal data
707 * object. If the signal mask of the object becomes empty that
708 * way removes it. */
709
710 if (sigismember(&d->sigset, sig) == 0)
711 return;
712
713 assert_se(sigdelset(&d->sigset, sig) >= 0);
714
715 if (sigisemptyset(&d->sigset)) {
9da4cb2b 716 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 717 event_free_signal_data(e, d);
9da4cb2b
LP
718 return;
719 }
720
01e6af73
YW
721 if (event_pid_changed(e))
722 return;
723
9da4cb2b
LP
724 assert(d->fd >= 0);
725
726 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
727 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
728}
729
730static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
731 struct signal_data *d;
732 static const int64_t zero_priority = 0;
733
734 assert(e);
735
f8f3f926
LP
736 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
737 * and possibly drop the signalfd for it. */
9da4cb2b
LP
738
739 if (sig == SIGCHLD &&
b6d5481b 740 e->n_online_child_sources > 0)
9da4cb2b
LP
741 return;
742
743 if (e->signal_sources &&
744 e->signal_sources[sig] &&
b6d5481b 745 event_source_is_online(e->signal_sources[sig]))
9da4cb2b
LP
746 return;
747
748 /*
749 * The specified signal might be enabled in three different queues:
750 *
751 * 1) the one that belongs to the priority passed (if it is non-NULL)
752 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
753 * 3) the 0 priority (to cover the SIGCHLD case)
754 *
755 * Hence, let's remove it from all three here.
756 */
757
758 if (priority) {
759 d = hashmap_get(e->signal_data, priority);
760 if (d)
761 event_unmask_signal_data(e, d, sig);
762 }
763
764 if (e->signal_sources && e->signal_sources[sig]) {
765 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
766 if (d)
767 event_unmask_signal_data(e, d, sig);
768 }
769
770 d = hashmap_get(e->signal_data, &zero_priority);
771 if (d)
772 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
773}
774
e1951c16
MS
775static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
776 assert(s);
777
778 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
779 * they are enabled/disabled or marked pending and such. */
780
781 if (s->pending)
782 prioq_reshuffle(s->event->pending, s, &s->pending_index);
783
784 if (s->prepare)
785 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
786}
787
788static void event_source_time_prioq_reshuffle(sd_event_source *s) {
789 struct clock_data *d;
790
791 assert(s);
e1951c16
MS
792
793 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
5c08c7ab
YW
794 * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered
795 * properly again. */
b6d5481b
LP
796
797 if (s->ratelimited)
798 d = &s->event->monotonic;
5c08c7ab 799 else if (EVENT_SOURCE_IS_TIME(s->type))
b6d5481b 800 assert_se(d = event_get_clock_data(s->event, s->type));
5c08c7ab
YW
801 else
802 return; /* no-op for an event source which is neither a timer nor ratelimited. */
b6d5481b 803
f41315fc
LP
804 prioq_reshuffle(d->earliest, s, &s->earliest_index);
805 prioq_reshuffle(d->latest, s, &s->latest_index);
e1951c16
MS
806 d->needs_rearm = true;
807}
808
1e45e3fe
LP
809static void event_source_time_prioq_remove(
810 sd_event_source *s,
811 struct clock_data *d) {
812
813 assert(s);
814 assert(d);
815
f41315fc
LP
816 prioq_remove(d->earliest, s, &s->earliest_index);
817 prioq_remove(d->latest, s, &s->latest_index);
818 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
819 d->needs_rearm = true;
820}
821
a71fe8b8
LP
822static void source_disconnect(sd_event_source *s) {
823 sd_event *event;
897448bd 824 int r;
a71fe8b8 825
fd38203a
LP
826 assert(s);
827
a71fe8b8
LP
828 if (!s->event)
829 return;
15b38f93 830
a71fe8b8 831 assert(s->event->n_sources > 0);
fd38203a 832
a71fe8b8 833 switch (s->type) {
fd38203a 834
a71fe8b8
LP
835 case SOURCE_IO:
836 if (s->io.fd >= 0)
837 source_io_unregister(s);
fd38203a 838
a71fe8b8 839 break;
6a0f1f6d 840
a71fe8b8 841 case SOURCE_TIME_REALTIME:
a8548816 842 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
843 case SOURCE_TIME_MONOTONIC:
844 case SOURCE_TIME_REALTIME_ALARM:
b6d5481b
LP
845 case SOURCE_TIME_BOOTTIME_ALARM:
846 /* Only remove this event source from the time event source here if it is not ratelimited. If
847 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
848 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
849
850 if (!s->ratelimited) {
851 struct clock_data *d;
852 assert_se(d = event_get_clock_data(s->event, s->type));
853 event_source_time_prioq_remove(s, d);
854 }
855
a71fe8b8 856 break;
a71fe8b8
LP
857
858 case SOURCE_SIGNAL:
859 if (s->signal.sig > 0) {
9da4cb2b 860
a71fe8b8
LP
861 if (s->event->signal_sources)
862 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 863
9da4cb2b 864 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
897448bd
LP
865
866 if (s->signal.unblock) {
867 sigset_t new_ss;
868
869 if (sigemptyset(&new_ss) < 0)
870 log_debug_errno(errno, "Failed to reset signal set, ignoring: %m");
871 else if (sigaddset(&new_ss, s->signal.sig) < 0)
872 log_debug_errno(errno, "Failed to add signal %i to signal mask, ignoring: %m", s->signal.sig);
873 else {
874 r = pthread_sigmask(SIG_UNBLOCK, &new_ss, NULL);
875 if (r != 0)
876 log_debug_errno(r, "Failed to unblock signal %i, ignoring: %m", s->signal.sig);
877 }
878 }
6a0f1f6d 879 }
fd38203a 880
a71fe8b8 881 break;
fd38203a 882
a71fe8b8 883 case SOURCE_CHILD:
86587c93
YW
884 if (event_pid_changed(s->event))
885 s->child.process_owned = false;
886
a71fe8b8 887 if (s->child.pid > 0) {
b6d5481b
LP
888 if (event_source_is_online(s)) {
889 assert(s->event->n_online_child_sources > 0);
890 s->event->n_online_child_sources--;
4807d2d0 891 }
fd38203a 892
4a0b58c4 893 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 894 }
fd38203a 895
f8f3f926
LP
896 if (EVENT_SOURCE_WATCH_PIDFD(s))
897 source_child_pidfd_unregister(s);
898 else
899 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
900
a71fe8b8 901 break;
fd38203a 902
a71fe8b8
LP
903 case SOURCE_DEFER:
904 /* nothing */
905 break;
fd38203a 906
a71fe8b8
LP
907 case SOURCE_POST:
908 set_remove(s->event->post_sources, s);
909 break;
da7e457c 910
a71fe8b8
LP
911 case SOURCE_EXIT:
912 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
913 break;
0eb2e0e3 914
97ef5391
LP
915 case SOURCE_INOTIFY: {
916 struct inode_data *inode_data;
917
918 inode_data = s->inotify.inode_data;
919 if (inode_data) {
920 struct inotify_data *inotify_data;
921 assert_se(inotify_data = inode_data->inotify_data);
922
923 /* Detach this event source from the inode object */
924 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
925 s->inotify.inode_data = NULL;
926
927 if (s->pending) {
928 assert(inotify_data->n_pending > 0);
929 inotify_data->n_pending--;
930 }
931
932 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
933 * continued to being watched. That's because inotify doesn't really have an API for that: we
934 * can only change watch masks with access to the original inode either by fd or by path. But
935 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 936 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
937 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
938 * there), but given the need for open_by_handle_at() which is privileged and not universally
939 * available this would be quite an incomplete solution. Hence we go the other way, leave the
940 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
941 * anymore after reception. Yes, this sucks, but … Linux … */
942
943 /* Maybe release the inode data (and its inotify) */
944 event_gc_inode_data(s->event, inode_data);
945 }
946
947 break;
948 }
949
a71fe8b8 950 default:
04499a70 951 assert_not_reached();
a71fe8b8 952 }
6e9feda3 953
a71fe8b8
LP
954 if (s->pending)
955 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 956
a71fe8b8
LP
957 if (s->prepare)
958 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 959
b6d5481b
LP
960 if (s->ratelimited)
961 event_source_time_prioq_remove(s, &s->event->monotonic);
962
e514aa1e 963 event = TAKE_PTR(s->event);
a71fe8b8
LP
964 LIST_REMOVE(sources, event->sources, s);
965 event->n_sources--;
fd38203a 966
f5982559
LP
967 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
968 * pidfd associated with this event source, which we'll do only on source_free(). */
969
a71fe8b8
LP
970 if (!s->floating)
971 sd_event_unref(event);
972}
973
75db809a 974static sd_event_source* source_free(sd_event_source *s) {
a71fe8b8 975 assert(s);
fd38203a 976
a71fe8b8 977 source_disconnect(s);
ab93297c
NM
978
979 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
980 s->io.fd = safe_close(s->io.fd);
981
f8f3f926
LP
982 if (s->type == SOURCE_CHILD) {
983 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
984
985 if (s->child.process_owned) {
986
987 if (!s->child.exited) {
988 bool sent = false;
989
990 if (s->child.pidfd >= 0) {
991 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
992 if (errno == ESRCH) /* Already dead */
993 sent = true;
994 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
995 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
996 s->child.pid);
997 } else
998 sent = true;
999 }
1000
1001 if (!sent)
1002 if (kill(s->child.pid, SIGKILL) < 0)
1003 if (errno != ESRCH) /* Already dead */
1004 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
1005 s->child.pid);
1006 }
1007
1008 if (!s->child.waited) {
1009 siginfo_t si = {};
1010
1011 /* Reap the child if we can */
1012 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
1013 }
1014 }
1015
1016 if (s->child.pidfd_owned)
1017 s->child.pidfd = safe_close(s->child.pidfd);
1018 }
1019
15723a1d
LP
1020 if (s->destroy_callback)
1021 s->destroy_callback(s->userdata);
ab93297c 1022
356779df 1023 free(s->description);
75db809a 1024 return mfree(s);
fd38203a 1025}
8c75fe17 1026DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
1027
1028static int source_set_pending(sd_event_source *s, bool b) {
1029 int r;
1030
1031 assert(s);
6203e07a 1032 assert(s->type != SOURCE_EXIT);
fd38203a
LP
1033
1034 if (s->pending == b)
1035 return 0;
1036
1037 s->pending = b;
1038
1039 if (b) {
1040 s->pending_iteration = s->event->iteration;
1041
1042 r = prioq_put(s->event->pending, s, &s->pending_index);
1043 if (r < 0) {
1044 s->pending = false;
1045 return r;
1046 }
1047 } else
1048 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1049
e1951c16
MS
1050 if (EVENT_SOURCE_IS_TIME(s->type))
1051 event_source_time_prioq_reshuffle(s);
2576a19e 1052
9da4cb2b
LP
1053 if (s->type == SOURCE_SIGNAL && !b) {
1054 struct signal_data *d;
1055
1056 d = hashmap_get(s->event->signal_data, &s->priority);
1057 if (d && d->current == s)
1058 d->current = NULL;
1059 }
1060
97ef5391
LP
1061 if (s->type == SOURCE_INOTIFY) {
1062
1063 assert(s->inotify.inode_data);
1064 assert(s->inotify.inode_data->inotify_data);
1065
1066 if (b)
1067 s->inotify.inode_data->inotify_data->n_pending ++;
1068 else {
1069 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1070 s->inotify.inode_data->inotify_data->n_pending --;
1071 }
1072 }
1073
efd3be9d 1074 return 1;
fd38203a
LP
1075}
1076
a71fe8b8 1077static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
1078 sd_event_source *s;
1079
1080 assert(e);
1081
d08eb1fa 1082 s = new(sd_event_source, 1);
fd38203a
LP
1083 if (!s)
1084 return NULL;
1085
d08eb1fa
LP
1086 *s = (struct sd_event_source) {
1087 .n_ref = 1,
1088 .event = e,
1089 .floating = floating,
1090 .type = type,
1091 .pending_index = PRIOQ_IDX_NULL,
1092 .prepare_index = PRIOQ_IDX_NULL,
1093 };
a71fe8b8
LP
1094
1095 if (!floating)
1096 sd_event_ref(e);
fd38203a 1097
a71fe8b8 1098 LIST_PREPEND(sources, e->sources, s);
313cefa1 1099 e->n_sources++;
15b38f93 1100
fd38203a
LP
1101 return s;
1102}
1103
b9350e70
LP
1104static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1105 assert(s);
1106
1107 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1108}
1109
f7262a9f 1110_public_ int sd_event_add_io(
fd38203a 1111 sd_event *e,
151b9b96 1112 sd_event_source **ret,
fd38203a
LP
1113 int fd,
1114 uint32_t events,
718db961 1115 sd_event_io_handler_t callback,
151b9b96 1116 void *userdata) {
fd38203a 1117
ec766a51 1118 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1119 int r;
1120
305f78bf 1121 assert_return(e, -EINVAL);
b937d761 1122 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 1123 assert_return(fd >= 0, -EBADF);
2a16a986 1124 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1125 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1126 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1127
b9350e70
LP
1128 if (!callback)
1129 callback = io_exit_callback;
1130
a71fe8b8 1131 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1132 if (!s)
1133 return -ENOMEM;
1134
9da4cb2b 1135 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1136 s->io.fd = fd;
1137 s->io.events = events;
1138 s->io.callback = callback;
1139 s->userdata = userdata;
baf76283 1140 s->enabled = SD_EVENT_ON;
fd38203a 1141
baf76283 1142 r = source_io_register(s, s->enabled, events);
ec766a51 1143 if (r < 0)
050f74f2 1144 return r;
fd38203a 1145
a71fe8b8
LP
1146 if (ret)
1147 *ret = s;
ec766a51 1148 TAKE_PTR(s);
a71fe8b8 1149
fd38203a
LP
1150 return 0;
1151}
1152
52444dc4
LP
1153static void initialize_perturb(sd_event *e) {
1154 sd_id128_t bootid = {};
1155
1156 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1157 the same time within each minute/second/250ms, so that
52444dc4
LP
1158 events all across the system can be coalesced into a single
1159 CPU wakeup. However, let's take some system-specific
1160 randomness for this value, so that in a network of systems
1161 with synced clocks timer events are distributed a
1162 bit. Here, we calculate a perturbation usec offset from the
1163 boot ID. */
1164
3a43da28 1165 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1166 return;
1167
1168 if (sd_id128_get_boot(&bootid) >= 0)
1169 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1170}
1171
fd38203a
LP
1172static int event_setup_timer_fd(
1173 sd_event *e,
6a0f1f6d
LP
1174 struct clock_data *d,
1175 clockid_t clock) {
fd38203a 1176
fd38203a 1177 assert(e);
6a0f1f6d 1178 assert(d);
fd38203a 1179
6a0f1f6d 1180 if (_likely_(d->fd >= 0))
fd38203a
LP
1181 return 0;
1182
254d1313 1183 _cleanup_close_ int fd = -EBADF;
b44d87e2 1184
6a0f1f6d 1185 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1186 if (fd < 0)
1187 return -errno;
1188
7fe2903c
LP
1189 fd = fd_move_above_stdio(fd);
1190
1eac7948 1191 struct epoll_event ev = {
a82f89aa
LP
1192 .events = EPOLLIN,
1193 .data.ptr = d,
1194 };
fd38203a 1195
15c689d7 1196 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1197 return -errno;
fd38203a 1198
b44d87e2 1199 d->fd = TAKE_FD(fd);
fd38203a
LP
1200 return 0;
1201}
1202
c4f1aff2
TG
1203static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1204 assert(s);
1205
1206 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1207}
1208
41c63f36
LP
1209static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1210 int r;
1211
1212 assert(d);
1213
1214 if (d->fd < 0) {
1215 r = event_setup_timer_fd(e, d, clock);
1216 if (r < 0)
1217 return r;
1218 }
1219
1220 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1221 if (r < 0)
1222 return r;
1223
1224 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1225 if (r < 0)
1226 return r;
1227
1228 return 0;
1229}
1230
1e45e3fe
LP
1231static int event_source_time_prioq_put(
1232 sd_event_source *s,
1233 struct clock_data *d) {
1234
1235 int r;
1236
1237 assert(s);
1238 assert(d);
19947509 1239 assert(EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
1e45e3fe 1240
f41315fc 1241 r = prioq_put(d->earliest, s, &s->earliest_index);
1e45e3fe
LP
1242 if (r < 0)
1243 return r;
1244
f41315fc 1245 r = prioq_put(d->latest, s, &s->latest_index);
1e45e3fe 1246 if (r < 0) {
f41315fc
LP
1247 assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
1248 s->earliest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
1249 return r;
1250 }
1251
1252 d->needs_rearm = true;
1253 return 0;
1254}
1255
6a0f1f6d 1256_public_ int sd_event_add_time(
fd38203a 1257 sd_event *e,
151b9b96 1258 sd_event_source **ret,
6a0f1f6d 1259 clockid_t clock,
fd38203a 1260 uint64_t usec,
c2ba3ad6 1261 uint64_t accuracy,
718db961 1262 sd_event_time_handler_t callback,
151b9b96 1263 void *userdata) {
fd38203a 1264
6a0f1f6d 1265 EventSourceType type;
ec766a51 1266 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1267 struct clock_data *d;
fd38203a
LP
1268 int r;
1269
305f78bf 1270 assert_return(e, -EINVAL);
b937d761 1271 assert_return(e = event_resolve(e), -ENOPKG);
f5fbe71d 1272 assert_return(accuracy != UINT64_MAX, -EINVAL);
da7e457c 1273 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1274 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1275
e475d10c
LP
1276 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1277 return -EOPNOTSUPP;
1278
1279 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1280 if (type < 0)
3411372e
LP
1281 return -EOPNOTSUPP;
1282
c4f1aff2
TG
1283 if (!callback)
1284 callback = time_exit_callback;
1285
1e45e3fe 1286 assert_se(d = event_get_clock_data(e, type));
c2ba3ad6 1287
41c63f36 1288 r = setup_clock_data(e, d, clock);
c983e776
EV
1289 if (r < 0)
1290 return r;
fd38203a 1291
a71fe8b8 1292 s = source_new(e, !ret, type);
fd38203a
LP
1293 if (!s)
1294 return -ENOMEM;
1295
1296 s->time.next = usec;
c2ba3ad6 1297 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1298 s->time.callback = callback;
f41315fc 1299 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
fd38203a 1300 s->userdata = userdata;
baf76283 1301 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1302
1e45e3fe 1303 r = event_source_time_prioq_put(s, d);
c2ba3ad6 1304 if (r < 0)
ec766a51 1305 return r;
fd38203a 1306
a71fe8b8
LP
1307 if (ret)
1308 *ret = s;
ec766a51 1309 TAKE_PTR(s);
a71fe8b8 1310
fd38203a
LP
1311 return 0;
1312}
1313
d6a83dc4
LP
1314_public_ int sd_event_add_time_relative(
1315 sd_event *e,
1316 sd_event_source **ret,
1317 clockid_t clock,
1318 uint64_t usec,
1319 uint64_t accuracy,
1320 sd_event_time_handler_t callback,
1321 void *userdata) {
1322
1323 usec_t t;
1324 int r;
1325
1326 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1327 * checks for overflow. */
1328
1329 r = sd_event_now(e, clock, &t);
1330 if (r < 0)
1331 return r;
1332
1333 if (usec >= USEC_INFINITY - t)
1334 return -EOVERFLOW;
1335
1336 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1337}
1338
59bc1fd7
LP
1339static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1340 assert(s);
1341
1342 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1343}
1344
f7262a9f 1345_public_ int sd_event_add_signal(
305f78bf 1346 sd_event *e,
151b9b96 1347 sd_event_source **ret,
305f78bf 1348 int sig,
718db961 1349 sd_event_signal_handler_t callback,
151b9b96 1350 void *userdata) {
305f78bf 1351
ec766a51 1352 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1353 struct signal_data *d;
897448bd
LP
1354 sigset_t new_ss;
1355 bool block_it;
fd38203a
LP
1356 int r;
1357
305f78bf 1358 assert_return(e, -EINVAL);
b937d761 1359 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1360 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1361 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1362
897448bd
LP
1363 /* Let's make sure our special flag stays outside of the valid signal range */
1364 assert_cc(_NSIG < SD_EVENT_SIGNAL_PROCMASK);
1365
1366 if (sig & SD_EVENT_SIGNAL_PROCMASK) {
1367 sig &= ~SD_EVENT_SIGNAL_PROCMASK;
1368 assert_return(SIGNAL_VALID(sig), -EINVAL);
1369
1370 block_it = true;
1371 } else {
1372 assert_return(SIGNAL_VALID(sig), -EINVAL);
1373
1374 r = signal_is_blocked(sig);
1375 if (r < 0)
1376 return r;
1377 if (r == 0)
1378 return -EBUSY;
1379
1380 block_it = false;
1381 }
1382
59bc1fd7
LP
1383 if (!callback)
1384 callback = signal_exit_callback;
1385
fd38203a
LP
1386 if (!e->signal_sources) {
1387 e->signal_sources = new0(sd_event_source*, _NSIG);
1388 if (!e->signal_sources)
1389 return -ENOMEM;
1390 } else if (e->signal_sources[sig])
1391 return -EBUSY;
1392
a71fe8b8 1393 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1394 if (!s)
1395 return -ENOMEM;
1396
1397 s->signal.sig = sig;
1398 s->signal.callback = callback;
1399 s->userdata = userdata;
baf76283 1400 s->enabled = SD_EVENT_ON;
fd38203a
LP
1401
1402 e->signal_sources[sig] = s;
fd38203a 1403
897448bd
LP
1404 if (block_it) {
1405 sigset_t old_ss;
1406
1407 if (sigemptyset(&new_ss) < 0)
1408 return -errno;
1409
1410 if (sigaddset(&new_ss, sig) < 0)
1411 return -errno;
1412
1413 r = pthread_sigmask(SIG_BLOCK, &new_ss, &old_ss);
1414 if (r != 0)
1415 return -r;
1416
1417 r = sigismember(&old_ss, sig);
1418 if (r < 0)
1419 return -errno;
1420
1421 s->signal.unblock = !r;
1422 } else
1423 s->signal.unblock = false;
1424
9da4cb2b 1425 r = event_make_signal_data(e, sig, &d);
897448bd
LP
1426 if (r < 0) {
1427 if (s->signal.unblock)
1428 (void) pthread_sigmask(SIG_UNBLOCK, &new_ss, NULL);
1429
9da4cb2b 1430 return r;
897448bd 1431 }
fd38203a 1432
f1f00dbb
LP
1433 /* Use the signal name as description for the event source by default */
1434 (void) sd_event_source_set_description(s, signal_to_string(sig));
1435
a71fe8b8
LP
1436 if (ret)
1437 *ret = s;
ec766a51 1438 TAKE_PTR(s);
a71fe8b8 1439
fd38203a
LP
1440 return 0;
1441}
1442
b9350e70
LP
1443static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1444 assert(s);
1445
1446 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1447}
1448
f8f3f926
LP
1449static bool shall_use_pidfd(void) {
1450 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1451 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1452}
1453
f7262a9f 1454_public_ int sd_event_add_child(
305f78bf 1455 sd_event *e,
151b9b96 1456 sd_event_source **ret,
305f78bf
LP
1457 pid_t pid,
1458 int options,
718db961 1459 sd_event_child_handler_t callback,
151b9b96 1460 void *userdata) {
305f78bf 1461
ec766a51 1462 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1463 int r;
1464
305f78bf 1465 assert_return(e, -EINVAL);
b937d761 1466 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1467 assert_return(pid > 1, -EINVAL);
1468 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1469 assert_return(options != 0, -EINVAL);
da7e457c 1470 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1471 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1472
b9350e70
LP
1473 if (!callback)
1474 callback = child_exit_callback;
1475
b6d5481b 1476 if (e->n_online_child_sources == 0) {
ee880b37
LP
1477 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1478 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1479 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1480 * take effect.
1481 *
1482 * (As an optimization we only do this check on the first child event source created.) */
1483 r = signal_is_blocked(SIGCHLD);
1484 if (r < 0)
1485 return r;
1486 if (r == 0)
1487 return -EBUSY;
1488 }
1489
d5099efc 1490 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1491 if (r < 0)
1492 return r;
1493
4a0b58c4 1494 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1495 return -EBUSY;
1496
a71fe8b8 1497 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1498 if (!s)
1499 return -ENOMEM;
1500
f8f3f926 1501 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1502 s->child.options = options;
1503 s->child.callback = callback;
1504 s->userdata = userdata;
baf76283 1505 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1506
f8f3f926
LP
1507 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1508 * pin the PID, and make regular waitid() handling race-free. */
1509
1510 if (shall_use_pidfd()) {
54988a27 1511 s->child.pidfd = pidfd_open(pid, 0);
f8f3f926
LP
1512 if (s->child.pidfd < 0) {
1513 /* Propagate errors unless the syscall is not supported or blocked */
1514 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1515 return -errno;
1516 } else
1517 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1518 } else
254d1313 1519 s->child.pidfd = -EBADF;
f8f3f926 1520
f8f3f926
LP
1521 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1522 /* We have a pidfd and we only want to watch for exit */
f8f3f926 1523 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1524 if (r < 0)
f8f3f926 1525 return r;
ac9f2640 1526
f8f3f926
LP
1527 } else {
1528 /* We have no pidfd or we shall wait for some other event than WEXITED */
f8f3f926 1529 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1530 if (r < 0)
f8f3f926 1531 return r;
f8f3f926
LP
1532
1533 e->need_process_child = true;
1534 }
c2ba3ad6 1535
54988a27
YW
1536 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1537 if (r < 0)
1538 return r;
1539
1540 /* These must be done after everything succeeds. */
1541 s->child.pid = pid;
b6d5481b 1542 e->n_online_child_sources++;
ac9f2640 1543
a71fe8b8
LP
1544 if (ret)
1545 *ret = s;
ec766a51 1546 TAKE_PTR(s);
f8f3f926
LP
1547 return 0;
1548}
1549
1550_public_ int sd_event_add_child_pidfd(
1551 sd_event *e,
1552 sd_event_source **ret,
1553 int pidfd,
1554 int options,
1555 sd_event_child_handler_t callback,
1556 void *userdata) {
1557
1558
1559 _cleanup_(source_freep) sd_event_source *s = NULL;
1560 pid_t pid;
1561 int r;
1562
1563 assert_return(e, -EINVAL);
1564 assert_return(e = event_resolve(e), -ENOPKG);
1565 assert_return(pidfd >= 0, -EBADF);
1566 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1567 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1568 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1569 assert_return(!event_pid_changed(e), -ECHILD);
1570
b9350e70
LP
1571 if (!callback)
1572 callback = child_exit_callback;
1573
b6d5481b 1574 if (e->n_online_child_sources == 0) {
ee880b37
LP
1575 r = signal_is_blocked(SIGCHLD);
1576 if (r < 0)
1577 return r;
1578 if (r == 0)
1579 return -EBUSY;
1580 }
1581
f8f3f926
LP
1582 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1583 if (r < 0)
1584 return r;
1585
1586 r = pidfd_get_pid(pidfd, &pid);
1587 if (r < 0)
1588 return r;
1589
1590 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1591 return -EBUSY;
1592
1593 s = source_new(e, !ret, SOURCE_CHILD);
1594 if (!s)
1595 return -ENOMEM;
1596
1597 s->wakeup = WAKEUP_EVENT_SOURCE;
1598 s->child.pidfd = pidfd;
1599 s->child.pid = pid;
1600 s->child.options = options;
1601 s->child.callback = callback;
1602 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1603 s->userdata = userdata;
1604 s->enabled = SD_EVENT_ONESHOT;
1605
1606 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1607 if (r < 0)
1608 return r;
1609
f8f3f926
LP
1610 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1611 /* We only want to watch for WEXITED */
f8f3f926 1612 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1613 if (r < 0)
f8f3f926 1614 return r;
f8f3f926
LP
1615 } else {
1616 /* We shall wait for some other event than WEXITED */
f8f3f926 1617 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1618 if (r < 0)
f8f3f926 1619 return r;
a71fe8b8 1620
f8f3f926
LP
1621 e->need_process_child = true;
1622 }
1623
b6d5481b 1624 e->n_online_child_sources++;
ac9f2640 1625
f8f3f926
LP
1626 if (ret)
1627 *ret = s;
f8f3f926 1628 TAKE_PTR(s);
fd38203a
LP
1629 return 0;
1630}
1631
b9350e70
LP
1632static int generic_exit_callback(sd_event_source *s, void *userdata) {
1633 assert(s);
1634
1635 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1636}
1637
f7262a9f 1638_public_ int sd_event_add_defer(
305f78bf 1639 sd_event *e,
151b9b96 1640 sd_event_source **ret,
718db961 1641 sd_event_handler_t callback,
151b9b96 1642 void *userdata) {
305f78bf 1643
ec766a51 1644 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1645 int r;
1646
305f78bf 1647 assert_return(e, -EINVAL);
b937d761 1648 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1649 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1650 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1651
b9350e70
LP
1652 if (!callback)
1653 callback = generic_exit_callback;
1654
a71fe8b8 1655 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1656 if (!s)
1657 return -ENOMEM;
1658
1659 s->defer.callback = callback;
1660 s->userdata = userdata;
baf76283 1661 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1662
1663 r = source_set_pending(s, true);
ec766a51 1664 if (r < 0)
fd38203a 1665 return r;
fd38203a 1666
a71fe8b8
LP
1667 if (ret)
1668 *ret = s;
ec766a51 1669 TAKE_PTR(s);
a71fe8b8 1670
fd38203a
LP
1671 return 0;
1672}
1673
6e9feda3
LP
1674_public_ int sd_event_add_post(
1675 sd_event *e,
1676 sd_event_source **ret,
1677 sd_event_handler_t callback,
1678 void *userdata) {
1679
ec766a51 1680 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1681 int r;
1682
1683 assert_return(e, -EINVAL);
b937d761 1684 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1685 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1686 assert_return(!event_pid_changed(e), -ECHILD);
1687
b9350e70
LP
1688 if (!callback)
1689 callback = generic_exit_callback;
1690
a71fe8b8 1691 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1692 if (!s)
1693 return -ENOMEM;
1694
1695 s->post.callback = callback;
1696 s->userdata = userdata;
1697 s->enabled = SD_EVENT_ON;
1698
de7fef4b 1699 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1700 if (r < 0)
6e9feda3 1701 return r;
de7fef4b 1702 assert(r > 0);
6e9feda3 1703
a71fe8b8
LP
1704 if (ret)
1705 *ret = s;
ec766a51 1706 TAKE_PTR(s);
a71fe8b8 1707
6e9feda3
LP
1708 return 0;
1709}
1710
6203e07a 1711_public_ int sd_event_add_exit(
305f78bf 1712 sd_event *e,
151b9b96 1713 sd_event_source **ret,
718db961 1714 sd_event_handler_t callback,
151b9b96 1715 void *userdata) {
305f78bf 1716
ec766a51 1717 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1718 int r;
1719
1720 assert_return(e, -EINVAL);
b937d761 1721 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1722 assert_return(callback, -EINVAL);
1723 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1724 assert_return(!event_pid_changed(e), -ECHILD);
1725
c983e776
EV
1726 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1727 if (r < 0)
1728 return r;
da7e457c 1729
a71fe8b8 1730 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1731 if (!s)
da7e457c 1732 return -ENOMEM;
fd38203a 1733
6203e07a 1734 s->exit.callback = callback;
da7e457c 1735 s->userdata = userdata;
6203e07a 1736 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1737 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1738
6203e07a 1739 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1740 if (r < 0)
da7e457c 1741 return r;
da7e457c 1742
a71fe8b8
LP
1743 if (ret)
1744 *ret = s;
ec766a51 1745 TAKE_PTR(s);
a71fe8b8 1746
da7e457c
LP
1747 return 0;
1748}
1749
97ef5391
LP
1750static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1751 assert(e);
1752
1753 if (!d)
1754 return;
1755
1756 assert(hashmap_isempty(d->inodes));
1757 assert(hashmap_isempty(d->wd));
1758
1759 if (d->buffer_filled > 0)
0601b958 1760 LIST_REMOVE(buffered, e->buffered_inotify_data_list, d);
97ef5391
LP
1761
1762 hashmap_free(d->inodes);
1763 hashmap_free(d->wd);
1764
1765 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1766
1767 if (d->fd >= 0) {
fbae5090
YW
1768 if (!event_pid_changed(e) &&
1769 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
97ef5391
LP
1770 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1771
1772 safe_close(d->fd);
1773 }
1774 free(d);
1775}
1776
1777static int event_make_inotify_data(
1778 sd_event *e,
1779 int64_t priority,
1780 struct inotify_data **ret) {
1781
254d1313 1782 _cleanup_close_ int fd = -EBADF;
97ef5391 1783 struct inotify_data *d;
97ef5391
LP
1784 int r;
1785
1786 assert(e);
1787
1788 d = hashmap_get(e->inotify_data, &priority);
1789 if (d) {
1790 if (ret)
1791 *ret = d;
1792 return 0;
1793 }
1794
1795 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1796 if (fd < 0)
1797 return -errno;
1798
1799 fd = fd_move_above_stdio(fd);
1800
97ef5391
LP
1801 d = new(struct inotify_data, 1);
1802 if (!d)
1803 return -ENOMEM;
1804
1805 *d = (struct inotify_data) {
1806 .wakeup = WAKEUP_INOTIFY_DATA,
1807 .fd = TAKE_FD(fd),
1808 .priority = priority,
1809 };
1810
c2484a75 1811 r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d);
97ef5391
LP
1812 if (r < 0) {
1813 d->fd = safe_close(d->fd);
1814 free(d);
1815 return r;
1816 }
1817
1eac7948 1818 struct epoll_event ev = {
97ef5391
LP
1819 .events = EPOLLIN,
1820 .data.ptr = d,
1821 };
1822
1823 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1824 r = -errno;
1825 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1826 * remove the fd from the epoll first, which we don't want as we couldn't
1827 * add it in the first place. */
1828 event_free_inotify_data(e, d);
1829 return r;
1830 }
1831
1832 if (ret)
1833 *ret = d;
1834
1835 return 1;
1836}
1837
7a08d314 1838static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1839 int r;
97ef5391
LP
1840
1841 assert(x);
1842 assert(y);
1843
90c88092
YW
1844 r = CMP(x->dev, y->dev);
1845 if (r != 0)
1846 return r;
97ef5391 1847
6dd91b36 1848 return CMP(x->ino, y->ino);
97ef5391
LP
1849}
1850
7a08d314
YW
1851static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1852 assert(d);
97ef5391
LP
1853
1854 siphash24_compress(&d->dev, sizeof(d->dev), state);
1855 siphash24_compress(&d->ino, sizeof(d->ino), state);
1856}
1857
7a08d314 1858DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1859
1860static void event_free_inode_data(
1861 sd_event *e,
1862 struct inode_data *d) {
1863
1864 assert(e);
1865
1866 if (!d)
1867 return;
1868
64903d18 1869 assert(!d->event_sources);
97ef5391
LP
1870
1871 if (d->fd >= 0) {
ed828563 1872 LIST_REMOVE(to_close, e->inode_data_to_close_list, d);
97ef5391
LP
1873 safe_close(d->fd);
1874 }
1875
1876 if (d->inotify_data) {
1877
1878 if (d->wd >= 0) {
fbae5090 1879 if (d->inotify_data->fd >= 0 && !event_pid_changed(e)) {
97ef5391
LP
1880 /* So here's a problem. At the time this runs the watch descriptor might already be
1881 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1882 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1883 * likely case to happen. */
1884
1885 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1886 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1887 }
1888
1889 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1890 }
1891
1892 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1893 }
1894
1895 free(d);
1896}
1897
53baf2ef
LP
1898static void event_gc_inotify_data(
1899 sd_event *e,
1900 struct inotify_data *d) {
1901
1902 assert(e);
1903
1904 /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch
1905 * any inode with it anymore, which in turn happens if no event source of this priority is interested
1906 * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC
1907 * (under the expectation that the GC is called again once the counter is decremented). */
1908
1909 if (!d)
1910 return;
1911
1912 if (!hashmap_isempty(d->inodes))
1913 return;
1914
1915 if (d->n_busy > 0)
1916 return;
1917
1918 event_free_inotify_data(e, d);
1919}
1920
97ef5391
LP
1921static void event_gc_inode_data(
1922 sd_event *e,
1923 struct inode_data *d) {
1924
1925 struct inotify_data *inotify_data;
1926
1927 assert(e);
1928
1929 if (!d)
1930 return;
1931
64903d18 1932 if (d->event_sources)
97ef5391
LP
1933 return;
1934
1935 inotify_data = d->inotify_data;
1936 event_free_inode_data(e, d);
1937
53baf2ef 1938 event_gc_inotify_data(e, inotify_data);
97ef5391
LP
1939}
1940
1941static int event_make_inode_data(
1942 sd_event *e,
1943 struct inotify_data *inotify_data,
1944 dev_t dev,
1945 ino_t ino,
1946 struct inode_data **ret) {
1947
1948 struct inode_data *d, key;
1949 int r;
1950
1951 assert(e);
1952 assert(inotify_data);
1953
1954 key = (struct inode_data) {
1955 .ino = ino,
1956 .dev = dev,
1957 };
1958
1959 d = hashmap_get(inotify_data->inodes, &key);
1960 if (d) {
1961 if (ret)
1962 *ret = d;
1963
1964 return 0;
1965 }
1966
1967 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1968 if (r < 0)
1969 return r;
1970
1971 d = new(struct inode_data, 1);
1972 if (!d)
1973 return -ENOMEM;
1974
1975 *d = (struct inode_data) {
1976 .dev = dev,
1977 .ino = ino,
1978 .wd = -1,
254d1313 1979 .fd = -EBADF,
97ef5391
LP
1980 .inotify_data = inotify_data,
1981 };
1982
1983 r = hashmap_put(inotify_data->inodes, d, d);
1984 if (r < 0) {
1985 free(d);
1986 return r;
1987 }
1988
1989 if (ret)
1990 *ret = d;
1991
1992 return 1;
1993}
1994
1995static uint32_t inode_data_determine_mask(struct inode_data *d) {
1996 bool excl_unlink = true;
1997 uint32_t combined = 0;
97ef5391
LP
1998
1999 assert(d);
2000
2001 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
2002 * the IN_EXCL_UNLINK flag is ANDed instead.
2003 *
2004 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
2005 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 2006 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
2007 * events we don't care for client-side. */
2008
2009 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
2010
2011 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
2012 excl_unlink = false;
2013
2014 combined |= s->inotify.mask;
2015 }
2016
2017 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
2018}
2019
2020static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
2021 uint32_t combined_mask;
2022 int wd, r;
2023
2024 assert(d);
2025 assert(d->fd >= 0);
2026
2027 combined_mask = inode_data_determine_mask(d);
2028
2029 if (d->wd >= 0 && combined_mask == d->combined_mask)
2030 return 0;
2031
2032 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
2033 if (r < 0)
2034 return r;
2035
2036 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
2037 if (wd < 0)
2038 return -errno;
2039
2040 if (d->wd < 0) {
2041 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
2042 if (r < 0) {
2043 (void) inotify_rm_watch(d->inotify_data->fd, wd);
2044 return r;
2045 }
2046
2047 d->wd = wd;
2048
2049 } else if (d->wd != wd) {
2050
2051 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
2052 (void) inotify_rm_watch(d->fd, wd);
2053 return -EINVAL;
2054 }
2055
2056 d->combined_mask = combined_mask;
2057 return 1;
2058}
2059
b9350e70
LP
2060static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
2061 assert(s);
2062
2063 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
2064}
2065
e67d738a 2066static int event_add_inotify_fd_internal(
97ef5391
LP
2067 sd_event *e,
2068 sd_event_source **ret,
e67d738a
LP
2069 int fd,
2070 bool donate,
97ef5391
LP
2071 uint32_t mask,
2072 sd_event_inotify_handler_t callback,
2073 void *userdata) {
2074
5bb1d7fb 2075 _cleanup_close_ int donated_fd = donate ? fd : -EBADF;
e67d738a 2076 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
2077 struct inotify_data *inotify_data = NULL;
2078 struct inode_data *inode_data = NULL;
97ef5391
LP
2079 struct stat st;
2080 int r;
2081
2082 assert_return(e, -EINVAL);
2083 assert_return(e = event_resolve(e), -ENOPKG);
e67d738a 2084 assert_return(fd >= 0, -EBADF);
97ef5391
LP
2085 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2086 assert_return(!event_pid_changed(e), -ECHILD);
2087
b9350e70
LP
2088 if (!callback)
2089 callback = inotify_exit_callback;
2090
97ef5391
LP
2091 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
2092 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
2093 * the user can't use them for us. */
2094 if (mask & IN_MASK_ADD)
2095 return -EINVAL;
2096
97ef5391
LP
2097 if (fstat(fd, &st) < 0)
2098 return -errno;
2099
2100 s = source_new(e, !ret, SOURCE_INOTIFY);
2101 if (!s)
2102 return -ENOMEM;
2103
2104 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
2105 s->inotify.mask = mask;
2106 s->inotify.callback = callback;
2107 s->userdata = userdata;
2108
2109 /* Allocate an inotify object for this priority, and an inode object within it */
2110 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
2111 if (r < 0)
8c75fe17 2112 return r;
97ef5391
LP
2113
2114 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17 2115 if (r < 0) {
e67d738a 2116 event_gc_inotify_data(e, inotify_data);
8c75fe17
ZJS
2117 return r;
2118 }
97ef5391
LP
2119
2120 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2121 * the event source, until then, for which we need the original inode. */
2122 if (inode_data->fd < 0) {
e67d738a
LP
2123 if (donated_fd >= 0)
2124 inode_data->fd = TAKE_FD(donated_fd);
2125 else {
2126 inode_data->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2127 if (inode_data->fd < 0) {
2128 r = -errno;
2129 event_gc_inode_data(e, inode_data);
2130 return r;
2131 }
2132 }
2133
ed828563 2134 LIST_PREPEND(to_close, e->inode_data_to_close_list, inode_data);
97ef5391
LP
2135 }
2136
2137 /* Link our event source to the inode data object */
2138 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
2139 s->inotify.inode_data = inode_data;
2140
97ef5391
LP
2141 /* Actually realize the watch now */
2142 r = inode_data_realize_watch(e, inode_data);
2143 if (r < 0)
8c75fe17 2144 return r;
97ef5391 2145
97ef5391
LP
2146 if (ret)
2147 *ret = s;
8c75fe17 2148 TAKE_PTR(s);
97ef5391
LP
2149
2150 return 0;
97ef5391
LP
2151}
2152
e67d738a
LP
2153_public_ int sd_event_add_inotify_fd(
2154 sd_event *e,
2155 sd_event_source **ret,
2156 int fd,
2157 uint32_t mask,
2158 sd_event_inotify_handler_t callback,
2159 void *userdata) {
2160
2161 return event_add_inotify_fd_internal(e, ret, fd, /* donate= */ false, mask, callback, userdata);
2162}
2163
2164_public_ int sd_event_add_inotify(
2165 sd_event *e,
2166 sd_event_source **ret,
2167 const char *path,
2168 uint32_t mask,
2169 sd_event_inotify_handler_t callback,
2170 void *userdata) {
2171
2091c779 2172 sd_event_source *s = NULL; /* avoid false maybe-uninitialized warning */
e67d738a
LP
2173 int fd, r;
2174
2175 assert_return(path, -EINVAL);
2176
586c8cee
ZJS
2177 fd = open(path, O_PATH | O_CLOEXEC |
2178 (mask & IN_ONLYDIR ? O_DIRECTORY : 0) |
2179 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
e67d738a
LP
2180 if (fd < 0)
2181 return -errno;
2182
2183 r = event_add_inotify_fd_internal(e, &s, fd, /* donate= */ true, mask, callback, userdata);
2184 if (r < 0)
2185 return r;
2186
2187 (void) sd_event_source_set_description(s, path);
2188
2189 if (ret)
2190 *ret = s;
2191
2192 return r;
2193}
2194
8301aa0b 2195static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
2196 if (!s)
2197 return NULL;
da7e457c 2198
8301aa0b
YW
2199 /* Here's a special hack: when we are called from a
2200 * dispatch handler we won't free the event source
2201 * immediately, but we will detach the fd from the
2202 * epoll. This way it is safe for the caller to unref
2203 * the event source and immediately close the fd, but
2204 * we still retain a valid event source object after
2205 * the callback. */
fd38203a 2206
76d04c3a 2207 if (s->dispatching)
8301aa0b 2208 source_disconnect(s);
76d04c3a 2209 else
8301aa0b 2210 source_free(s);
fd38203a
LP
2211
2212 return NULL;
2213}
2214
8301aa0b
YW
2215DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
2216
356779df 2217_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 2218 assert_return(s, -EINVAL);
f4b2933e 2219 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2220
356779df 2221 return free_and_strdup(&s->description, description);
f7f53e9e
TG
2222}
2223
356779df 2224_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 2225 assert_return(s, -EINVAL);
356779df 2226 assert_return(description, -EINVAL);
f4b2933e 2227 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2228
7d92a1a4
ZJS
2229 if (!s->description)
2230 return -ENXIO;
2231
356779df 2232 *description = s->description;
f7f53e9e
TG
2233 return 0;
2234}
2235
adcc4ca3 2236_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 2237 assert_return(s, NULL);
eaa3cbef
LP
2238
2239 return s->event;
2240}
2241
f7262a9f 2242_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 2243 assert_return(s, -EINVAL);
6203e07a 2244 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 2245 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2246 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2247
2248 return s->pending;
2249}
2250
f7262a9f 2251_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2252 assert_return(s, -EINVAL);
2253 assert_return(s->type == SOURCE_IO, -EDOM);
2254 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2255
2256 return s->io.fd;
2257}
2258
30caf8f3
LP
2259_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2260 int r;
2261
2262 assert_return(s, -EINVAL);
8ac43fee 2263 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2264 assert_return(s->type == SOURCE_IO, -EDOM);
2265 assert_return(!event_pid_changed(s->event), -ECHILD);
2266
2267 if (s->io.fd == fd)
2268 return 0;
2269
b6d5481b 2270 if (event_source_is_offline(s)) {
30caf8f3
LP
2271 s->io.fd = fd;
2272 s->io.registered = false;
2273 } else {
2274 int saved_fd;
2275
2276 saved_fd = s->io.fd;
2277 assert(s->io.registered);
2278
2279 s->io.fd = fd;
2280 s->io.registered = false;
2281
2282 r = source_io_register(s, s->enabled, s->io.events);
2283 if (r < 0) {
2284 s->io.fd = saved_fd;
2285 s->io.registered = true;
2286 return r;
2287 }
2288
5a795bff 2289 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2290 }
2291
2292 return 0;
2293}
2294
ab93297c
NM
2295_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2296 assert_return(s, -EINVAL);
2297 assert_return(s->type == SOURCE_IO, -EDOM);
2298
2299 return s->io.owned;
2300}
2301
2302_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2303 assert_return(s, -EINVAL);
2304 assert_return(s->type == SOURCE_IO, -EDOM);
2305
2306 s->io.owned = own;
2307 return 0;
2308}
2309
f7262a9f 2310_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2311 assert_return(s, -EINVAL);
2312 assert_return(events, -EINVAL);
2313 assert_return(s->type == SOURCE_IO, -EDOM);
2314 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2315
2316 *events = s->io.events;
2317 return 0;
2318}
2319
f7262a9f 2320_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2321 int r;
2322
305f78bf
LP
2323 assert_return(s, -EINVAL);
2324 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2325 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2326 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2327 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2328
b63c8d4f
DH
2329 /* edge-triggered updates are never skipped, so we can reset edges */
2330 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2331 return 0;
2332
2a0dc6cd
LP
2333 r = source_set_pending(s, false);
2334 if (r < 0)
2335 return r;
2336
b6d5481b 2337 if (event_source_is_online(s)) {
e4715127 2338 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2339 if (r < 0)
2340 return r;
2341 }
2342
2343 s->io.events = events;
2344
2345 return 0;
2346}
2347
f7262a9f 2348_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2349 assert_return(s, -EINVAL);
2350 assert_return(revents, -EINVAL);
2351 assert_return(s->type == SOURCE_IO, -EDOM);
2352 assert_return(s->pending, -ENODATA);
2353 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2354
2355 *revents = s->io.revents;
2356 return 0;
2357}
2358
f7262a9f 2359_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2360 assert_return(s, -EINVAL);
2361 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2362 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2363
2364 return s->signal.sig;
2365}
2366
31927c16 2367_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2368 assert_return(s, -EINVAL);
2369 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2370
6680b8d1
ME
2371 *priority = s->priority;
2372 return 0;
fd38203a
LP
2373}
2374
31927c16 2375_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2376 bool rm_inotify = false, rm_inode = false;
2377 struct inotify_data *new_inotify_data = NULL;
2378 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2379 int r;
2380
305f78bf 2381 assert_return(s, -EINVAL);
da7e457c 2382 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2383 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2384
2385 if (s->priority == priority)
2386 return 0;
2387
97ef5391
LP
2388 if (s->type == SOURCE_INOTIFY) {
2389 struct inode_data *old_inode_data;
2390
2391 assert(s->inotify.inode_data);
2392 old_inode_data = s->inotify.inode_data;
2393
2394 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2395 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2396 * events we allow priority changes only until the first following iteration. */
2397 if (old_inode_data->fd < 0)
2398 return -EOPNOTSUPP;
2399
2400 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2401 if (r < 0)
2402 return r;
2403 rm_inotify = r > 0;
2404
2405 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2406 if (r < 0)
2407 goto fail;
2408 rm_inode = r > 0;
2409
2410 if (new_inode_data->fd < 0) {
2411 /* Duplicate the fd for the new inode object if we don't have any yet */
2412 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2413 if (new_inode_data->fd < 0) {
2414 r = -errno;
2415 goto fail;
2416 }
2417
ed828563 2418 LIST_PREPEND(to_close, s->event->inode_data_to_close_list, new_inode_data);
97ef5391
LP
2419 }
2420
2421 /* Move the event source to the new inode data structure */
2422 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2423 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2424 s->inotify.inode_data = new_inode_data;
2425
2426 /* Now create the new watch */
2427 r = inode_data_realize_watch(s->event, new_inode_data);
2428 if (r < 0) {
2429 /* Move it back */
2430 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2431 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2432 s->inotify.inode_data = old_inode_data;
2433 goto fail;
2434 }
2435
2436 s->priority = priority;
2437
2438 event_gc_inode_data(s->event, old_inode_data);
2439
b6d5481b 2440 } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) {
9da4cb2b
LP
2441 struct signal_data *old, *d;
2442
2443 /* Move us from the signalfd belonging to the old
2444 * priority to the signalfd of the new priority */
2445
2446 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2447
2448 s->priority = priority;
2449
2450 r = event_make_signal_data(s->event, s->signal.sig, &d);
2451 if (r < 0) {
2452 s->priority = old->priority;
2453 return r;
2454 }
2455
2456 event_unmask_signal_data(s->event, old, s->signal.sig);
2457 } else
2458 s->priority = priority;
fd38203a 2459
e1951c16 2460 event_source_pp_prioq_reshuffle(s);
fd38203a 2461
6203e07a
LP
2462 if (s->type == SOURCE_EXIT)
2463 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2464
fd38203a 2465 return 0;
97ef5391
LP
2466
2467fail:
2468 if (rm_inode)
2469 event_free_inode_data(s->event, new_inode_data);
2470
2471 if (rm_inotify)
2472 event_free_inotify_data(s->event, new_inotify_data);
2473
2474 return r;
fd38203a
LP
2475}
2476
cad143a8 2477_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
71193c0b
ZJS
2478 /* Quick mode: the event source doesn't exist and we only want to query boolean enablement state. */
2479 if (!s && !ret)
2480 return false;
2481
305f78bf 2482 assert_return(s, -EINVAL);
305f78bf 2483 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2484
cad143a8
LP
2485 if (ret)
2486 *ret = s->enabled;
2487
08c1eb0e 2488 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2489}
2490
b6d5481b
LP
2491static int event_source_offline(
2492 sd_event_source *s,
2493 int enabled,
2494 bool ratelimited) {
2495
2496 bool was_offline;
fd38203a
LP
2497 int r;
2498
ddfde737 2499 assert(s);
b6d5481b 2500 assert(enabled == SD_EVENT_OFF || ratelimited);
fd38203a 2501
ddfde737 2502 /* Unset the pending flag when this event source is disabled */
b6d5481b
LP
2503 if (s->enabled != SD_EVENT_OFF &&
2504 enabled == SD_EVENT_OFF &&
2505 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2506 r = source_set_pending(s, false);
2507 if (r < 0)
2508 return r;
2509 }
cc567911 2510
b6d5481b
LP
2511 was_offline = event_source_is_offline(s);
2512 s->enabled = enabled;
2513 s->ratelimited = ratelimited;
fd38203a 2514
ddfde737 2515 switch (s->type) {
fd38203a 2516
ddfde737
LP
2517 case SOURCE_IO:
2518 source_io_unregister(s);
2519 break;
ac989a78 2520
ddfde737
LP
2521 case SOURCE_SIGNAL:
2522 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2523 break;
fd38203a 2524
ddfde737 2525 case SOURCE_CHILD:
b6d5481b
LP
2526 if (!was_offline) {
2527 assert(s->event->n_online_child_sources > 0);
2528 s->event->n_online_child_sources--;
2529 }
fd38203a 2530
ddfde737
LP
2531 if (EVENT_SOURCE_WATCH_PIDFD(s))
2532 source_child_pidfd_unregister(s);
2533 else
2534 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2535 break;
4807d2d0 2536
ddfde737
LP
2537 case SOURCE_EXIT:
2538 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2539 break;
fd38203a 2540
2115b9b6
YW
2541 case SOURCE_TIME_REALTIME:
2542 case SOURCE_TIME_BOOTTIME:
2543 case SOURCE_TIME_MONOTONIC:
2544 case SOURCE_TIME_REALTIME_ALARM:
2545 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737
LP
2546 case SOURCE_DEFER:
2547 case SOURCE_POST:
2548 case SOURCE_INOTIFY:
2549 break;
fd38203a 2550
ddfde737 2551 default:
04499a70 2552 assert_not_reached();
ddfde737 2553 }
fd38203a 2554
2115b9b6
YW
2555 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2556 event_source_time_prioq_reshuffle(s);
2557
b6d5481b 2558 return 1;
ddfde737 2559}
f8f3f926 2560
b6d5481b
LP
2561static int event_source_online(
2562 sd_event_source *s,
2563 int enabled,
2564 bool ratelimited) {
2565
2566 bool was_online;
ddfde737 2567 int r;
fd38203a 2568
ddfde737 2569 assert(s);
b6d5481b 2570 assert(enabled != SD_EVENT_OFF || !ratelimited);
305f78bf 2571
ddfde737 2572 /* Unset the pending flag when this event source is enabled */
b6d5481b
LP
2573 if (s->enabled == SD_EVENT_OFF &&
2574 enabled != SD_EVENT_OFF &&
2575 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2576 r = source_set_pending(s, false);
2577 if (r < 0)
2578 return r;
2579 }
9d3e3aa5 2580
b6d5481b
LP
2581 /* Are we really ready for onlining? */
2582 if (enabled == SD_EVENT_OFF || ratelimited) {
2583 /* Nope, we are not ready for onlining, then just update the precise state and exit */
2584 s->enabled = enabled;
2585 s->ratelimited = ratelimited;
2586 return 0;
2587 }
2588
2589 was_online = event_source_is_online(s);
2590
ddfde737 2591 switch (s->type) {
ddfde737 2592 case SOURCE_IO:
b6d5481b 2593 r = source_io_register(s, enabled, s->io.events);
d2eafe61 2594 if (r < 0)
ddfde737 2595 return r;
ddfde737 2596 break;
fd38203a 2597
ddfde737
LP
2598 case SOURCE_SIGNAL:
2599 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2600 if (r < 0) {
ddfde737
LP
2601 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2602 return r;
2603 }
fd38203a 2604
ddfde737 2605 break;
fd38203a 2606
ddfde737 2607 case SOURCE_CHILD:
ddfde737
LP
2608 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2609 /* yes, we have pidfd */
9da4cb2b 2610
b6d5481b 2611 r = source_child_pidfd_register(s, enabled);
ac9f2640 2612 if (r < 0)
9da4cb2b 2613 return r;
ddfde737
LP
2614 } else {
2615 /* no pidfd, or something other to watch for than WEXITED */
9da4cb2b 2616
ddfde737
LP
2617 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2618 if (r < 0) {
ddfde737
LP
2619 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2620 return r;
2621 }
2622 }
fd38203a 2623
b6d5481b
LP
2624 if (!was_online)
2625 s->event->n_online_child_sources++;
ddfde737 2626 break;
4807d2d0 2627
d2eafe61
ZJS
2628 case SOURCE_TIME_REALTIME:
2629 case SOURCE_TIME_BOOTTIME:
2630 case SOURCE_TIME_MONOTONIC:
2631 case SOURCE_TIME_REALTIME_ALARM:
2632 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737 2633 case SOURCE_EXIT:
ddfde737
LP
2634 case SOURCE_DEFER:
2635 case SOURCE_POST:
2636 case SOURCE_INOTIFY:
2637 break;
9da4cb2b 2638
ddfde737 2639 default:
04499a70 2640 assert_not_reached();
ddfde737 2641 }
f8f3f926 2642
b6d5481b
LP
2643 s->enabled = enabled;
2644 s->ratelimited = ratelimited;
d2eafe61
ZJS
2645
2646 /* Non-failing operations below */
2115b9b6 2647 if (s->type == SOURCE_EXIT)
d2eafe61 2648 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
d2eafe61 2649
2115b9b6
YW
2650 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2651 event_source_time_prioq_reshuffle(s);
d2eafe61 2652
b6d5481b 2653 return 1;
ddfde737
LP
2654}
2655
2656_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2657 int r;
9da4cb2b 2658
ddfde737 2659 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
7e922b05
ZJS
2660
2661 /* Quick mode: if the source doesn't exist, SD_EVENT_OFF is a noop. */
2662 if (m == SD_EVENT_OFF && !s)
2663 return 0;
2664
2665 assert_return(s, -EINVAL);
ddfde737 2666 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2667
ddfde737
LP
2668 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2669 if (s->event->state == SD_EVENT_FINISHED)
2670 return m == SD_EVENT_OFF ? 0 : -ESTALE;
305f78bf 2671
ddfde737
LP
2672 if (s->enabled == m) /* No change? */
2673 return 0;
9d3e3aa5 2674
ddfde737 2675 if (m == SD_EVENT_OFF)
b6d5481b 2676 r = event_source_offline(s, m, s->ratelimited);
ddfde737
LP
2677 else {
2678 if (s->enabled != SD_EVENT_OFF) {
2679 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2680 * event source is already enabled after all. */
2681 s->enabled = m;
2682 return 0;
fd38203a 2683 }
ddfde737 2684
b6d5481b 2685 r = event_source_online(s, m, s->ratelimited);
fd38203a 2686 }
ddfde737
LP
2687 if (r < 0)
2688 return r;
fd38203a 2689
e1951c16 2690 event_source_pp_prioq_reshuffle(s);
fd38203a
LP
2691 return 0;
2692}
2693
f7262a9f 2694_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2695 assert_return(s, -EINVAL);
2696 assert_return(usec, -EINVAL);
6a0f1f6d 2697 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2698 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2699
2700 *usec = s->time.next;
2701 return 0;
2702}
2703
f7262a9f 2704_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2705 int r;
6a0f1f6d 2706
305f78bf 2707 assert_return(s, -EINVAL);
6a0f1f6d 2708 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2709 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2710 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2711
2a0dc6cd
LP
2712 r = source_set_pending(s, false);
2713 if (r < 0)
2714 return r;
2576a19e 2715
2a0dc6cd 2716 s->time.next = usec;
fd38203a 2717
e1951c16 2718 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2719 return 0;
2720}
2721
d6a83dc4
LP
2722_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2723 usec_t t;
2724 int r;
2725
2726 assert_return(s, -EINVAL);
2727 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2728
ef859195
LP
2729 if (usec == USEC_INFINITY)
2730 return sd_event_source_set_time(s, USEC_INFINITY);
2731
d6a83dc4
LP
2732 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2733 if (r < 0)
2734 return r;
2735
496db330
YW
2736 usec = usec_add(t, usec);
2737 if (usec == USEC_INFINITY)
d6a83dc4
LP
2738 return -EOVERFLOW;
2739
496db330 2740 return sd_event_source_set_time(s, usec);
d6a83dc4
LP
2741}
2742
f7262a9f 2743_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2744 assert_return(s, -EINVAL);
2745 assert_return(usec, -EINVAL);
6a0f1f6d 2746 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2747 assert_return(!event_pid_changed(s->event), -ECHILD);
2748
2749 *usec = s->time.accuracy;
2750 return 0;
2751}
2752
f7262a9f 2753_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2754 int r;
6a0f1f6d 2755
305f78bf 2756 assert_return(s, -EINVAL);
f5fbe71d 2757 assert_return(usec != UINT64_MAX, -EINVAL);
6a0f1f6d 2758 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2759 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2760 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2761
2a0dc6cd
LP
2762 r = source_set_pending(s, false);
2763 if (r < 0)
2764 return r;
2765
eaa3cbef
LP
2766 if (usec == 0)
2767 usec = DEFAULT_ACCURACY_USEC;
2768
eaa3cbef
LP
2769 s->time.accuracy = usec;
2770
e1951c16 2771 event_source_time_prioq_reshuffle(s);
6a0f1f6d
LP
2772 return 0;
2773}
2774
2775_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2776 assert_return(s, -EINVAL);
2777 assert_return(clock, -EINVAL);
2778 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2779 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2780
6a0f1f6d 2781 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2782 return 0;
2783}
2784
f7262a9f 2785_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2786 assert_return(s, -EINVAL);
2787 assert_return(pid, -EINVAL);
2788 assert_return(s->type == SOURCE_CHILD, -EDOM);
2789 assert_return(!event_pid_changed(s->event), -ECHILD);
2790
2791 *pid = s->child.pid;
2792 return 0;
2793}
2794
f8f3f926
LP
2795_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2796 assert_return(s, -EINVAL);
2797 assert_return(s->type == SOURCE_CHILD, -EDOM);
2798 assert_return(!event_pid_changed(s->event), -ECHILD);
2799
2800 if (s->child.pidfd < 0)
2801 return -EOPNOTSUPP;
2802
2803 return s->child.pidfd;
2804}
2805
2806_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2807 assert_return(s, -EINVAL);
2808 assert_return(s->type == SOURCE_CHILD, -EDOM);
2809 assert_return(!event_pid_changed(s->event), -ECHILD);
2810 assert_return(SIGNAL_VALID(sig), -EINVAL);
2811
2812 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2813 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2814 * available. */
2815 if (s->child.exited)
2816 return -ESRCH;
2817
2818 if (s->child.pidfd >= 0) {
2819 siginfo_t copy;
2820
2821 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2822 * structure here */
2823 if (si)
2824 copy = *si;
2825
2826 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2827 /* Let's propagate the error only if the system call is not implemented or prohibited */
2828 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2829 return -errno;
2830 } else
2831 return 0;
2832 }
2833
2834 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2835 * this here. */
2836 if (flags != 0)
2837 return -EOPNOTSUPP;
2838
2839 if (si) {
2840 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2841 siginfo_t copy = *si;
2842
2843 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2844 return -errno;
2845 } else if (kill(s->child.pid, sig) < 0)
2846 return -errno;
2847
2848 return 0;
2849}
2850
2851_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2852 assert_return(s, -EINVAL);
2853 assert_return(s->type == SOURCE_CHILD, -EDOM);
2854
2855 if (s->child.pidfd < 0)
2856 return -EOPNOTSUPP;
2857
2858 return s->child.pidfd_owned;
2859}
2860
2861_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2862 assert_return(s, -EINVAL);
2863 assert_return(s->type == SOURCE_CHILD, -EDOM);
2864
2865 if (s->child.pidfd < 0)
2866 return -EOPNOTSUPP;
2867
2868 s->child.pidfd_owned = own;
2869 return 0;
2870}
2871
2872_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2873 assert_return(s, -EINVAL);
2874 assert_return(s->type == SOURCE_CHILD, -EDOM);
2875
2876 return s->child.process_owned;
2877}
2878
2879_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2880 assert_return(s, -EINVAL);
2881 assert_return(s->type == SOURCE_CHILD, -EDOM);
2882
2883 s->child.process_owned = own;
2884 return 0;
2885}
2886
97ef5391
LP
2887_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2888 assert_return(s, -EINVAL);
2889 assert_return(mask, -EINVAL);
2890 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2891 assert_return(!event_pid_changed(s->event), -ECHILD);
2892
2893 *mask = s->inotify.mask;
2894 return 0;
2895}
2896
718db961 2897_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2898 int r;
2899
da7e457c 2900 assert_return(s, -EINVAL);
6203e07a 2901 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2902 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2903 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2904
2905 if (s->prepare == callback)
2906 return 0;
2907
2908 if (callback && s->prepare) {
2909 s->prepare = callback;
2910 return 0;
2911 }
2912
2913 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2914 if (r < 0)
2915 return r;
2916
2917 s->prepare = callback;
2918
2919 if (callback) {
2920 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2921 if (r < 0)
2922 return r;
2923 } else
2924 prioq_remove(s->event->prepare, s, &s->prepare_index);
2925
2926 return 0;
2927}
2928
f7262a9f 2929_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2930 assert_return(s, NULL);
fd38203a
LP
2931
2932 return s->userdata;
2933}
2934
8f726607
LP
2935_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2936 void *ret;
2937
2938 assert_return(s, NULL);
2939
2940 ret = s->userdata;
2941 s->userdata = userdata;
2942
2943 return ret;
2944}
2945
b6d5481b
LP
2946static int event_source_enter_ratelimited(sd_event_source *s) {
2947 int r;
2948
2949 assert(s);
2950
2951 /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
2952 * the end of the rate limit time window, much as if it was a timer event source. */
2953
2954 if (s->ratelimited)
2955 return 0; /* Already ratelimited, this is a NOP hence */
2956
2957 /* Make sure we can install a CLOCK_MONOTONIC event further down. */
2958 r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC);
2959 if (r < 0)
2960 return r;
2961
2962 /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
2963 * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
2964 * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
2965 if (EVENT_SOURCE_IS_TIME(s->type))
2966 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2967
2968 /* Now, let's add the event source to the monotonic clock instead */
2969 r = event_source_time_prioq_put(s, &s->event->monotonic);
2970 if (r < 0)
2971 goto fail;
2972
2973 /* And let's take the event source officially offline */
2974 r = event_source_offline(s, s->enabled, /* ratelimited= */ true);
2975 if (r < 0) {
2976 event_source_time_prioq_remove(s, &s->event->monotonic);
2977 goto fail;
2978 }
2979
2980 event_source_pp_prioq_reshuffle(s);
2981
2982 log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description));
2983 return 0;
2984
2985fail:
2986 /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
2987 * space for it should already be allocated. */
2988 if (EVENT_SOURCE_IS_TIME(s->type))
2989 assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0);
2990
2991 return r;
2992}
2993
fd69f224 2994static int event_source_leave_ratelimit(sd_event_source *s, bool run_callback) {
b6d5481b
LP
2995 int r;
2996
2997 assert(s);
2998
2999 if (!s->ratelimited)
3000 return 0;
3001
3002 /* Let's take the event source out of the monotonic prioq first. */
3003 event_source_time_prioq_remove(s, &s->event->monotonic);
3004
3005 /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
3006 if (EVENT_SOURCE_IS_TIME(s->type)) {
3007 r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type));
3008 if (r < 0)
3009 goto fail;
3010 }
3011
3012 /* Let's try to take it online again. */
3013 r = event_source_online(s, s->enabled, /* ratelimited= */ false);
3014 if (r < 0) {
3015 /* Do something roughly sensible when this failed: undo the two prioq ops above */
3016 if (EVENT_SOURCE_IS_TIME(s->type))
3017 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
3018
3019 goto fail;
3020 }
3021
3022 event_source_pp_prioq_reshuffle(s);
3023 ratelimit_reset(&s->rate_limit);
3024
3025 log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description));
fd69f224
MS
3026
3027 if (run_callback && s->ratelimit_expire_callback) {
3028 s->dispatching = true;
3029 r = s->ratelimit_expire_callback(s, s->userdata);
3030 s->dispatching = false;
3031
3032 if (r < 0) {
3033 log_debug_errno(r, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m",
3034 strna(s->description),
3035 event_source_type_to_string(s->type),
3036 s->exit_on_failure ? "exiting" : "disabling");
3037
3038 if (s->exit_on_failure)
3039 (void) sd_event_exit(s->event, r);
3040 }
3041
3042 if (s->n_ref == 0)
3043 source_free(s);
3044 else if (r < 0)
0a040e64 3045 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
fd69f224
MS
3046
3047 return 1;
3048 }
3049
b6d5481b
LP
3050 return 0;
3051
3052fail:
3053 /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
3054 * simply put it back in it, maybe we can then process it more successfully next iteration. */
3055 assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0);
3056
3057 return r;
3058}
3059
c2ba3ad6
LP
3060static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
3061 usec_t c;
3062 assert(e);
3063 assert(a <= b);
3064
3065 if (a <= 0)
3066 return 0;
393003e1
LP
3067 if (a >= USEC_INFINITY)
3068 return USEC_INFINITY;
c2ba3ad6
LP
3069
3070 if (b <= a + 1)
3071 return a;
3072
52444dc4
LP
3073 initialize_perturb(e);
3074
c2ba3ad6
LP
3075 /*
3076 Find a good time to wake up again between times a and b. We
3077 have two goals here:
3078
3079 a) We want to wake up as seldom as possible, hence prefer
3080 later times over earlier times.
3081
3082 b) But if we have to wake up, then let's make sure to
3083 dispatch as much as possible on the entire system.
3084
3085 We implement this by waking up everywhere at the same time
850516e0 3086 within any given minute if we can, synchronised via the
c2ba3ad6 3087 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
3088 then we try to find the same spot in every 10s, then 1s and
3089 then 250ms step. Otherwise, we pick the last possible time
3090 to wake up.
c2ba3ad6
LP
3091 */
3092
850516e0
LP
3093 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
3094 if (c >= b) {
3095 if (_unlikely_(c < USEC_PER_MINUTE))
3096 return b;
3097
3098 c -= USEC_PER_MINUTE;
3099 }
3100
ba276c81
LP
3101 if (c >= a)
3102 return c;
3103
3104 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
3105 if (c >= b) {
3106 if (_unlikely_(c < USEC_PER_SEC*10))
3107 return b;
3108
3109 c -= USEC_PER_SEC*10;
3110 }
3111
850516e0
LP
3112 if (c >= a)
3113 return c;
3114
3115 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
3116 if (c >= b) {
3117 if (_unlikely_(c < USEC_PER_SEC))
3118 return b;
3119
3120 c -= USEC_PER_SEC;
3121 }
3122
3123 if (c >= a)
3124 return c;
3125
3126 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
3127 if (c >= b) {
3128 if (_unlikely_(c < USEC_PER_MSEC*250))
3129 return b;
3130
3131 c -= USEC_PER_MSEC*250;
3132 }
3133
3134 if (c >= a)
3135 return c;
3136
3137 return b;
3138}
3139
fd38203a
LP
3140static int event_arm_timer(
3141 sd_event *e,
6a0f1f6d 3142 struct clock_data *d) {
fd38203a
LP
3143
3144 struct itimerspec its = {};
c2ba3ad6
LP
3145 sd_event_source *a, *b;
3146 usec_t t;
fd38203a 3147
cde93897 3148 assert(e);
6a0f1f6d 3149 assert(d);
fd38203a 3150
d06441da 3151 if (!d->needs_rearm)
212bbb17 3152 return 0;
7e2bf71c
YW
3153
3154 d->needs_rearm = false;
212bbb17 3155
6a0f1f6d 3156 a = prioq_peek(d->earliest);
19947509 3157 assert(!a || EVENT_SOURCE_USES_TIME_PRIOQ(a->type));
b6d5481b 3158 if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) {
72aedc1e 3159
6a0f1f6d 3160 if (d->fd < 0)
c57b5ca3
LP
3161 return 0;
3162
3a43da28 3163 if (d->next == USEC_INFINITY)
72aedc1e
LP
3164 return 0;
3165
3166 /* disarm */
15c689d7
LP
3167 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3168 return -errno;
72aedc1e 3169
3a43da28 3170 d->next = USEC_INFINITY;
fd38203a 3171 return 0;
72aedc1e 3172 }
fd38203a 3173
6a0f1f6d 3174 b = prioq_peek(d->latest);
19947509
ZJS
3175 assert(!b || EVENT_SOURCE_USES_TIME_PRIOQ(b->type));
3176 assert(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 3177
b6d5481b 3178 t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b));
6a0f1f6d 3179 if (d->next == t)
fd38203a
LP
3180 return 0;
3181
6a0f1f6d 3182 assert_se(d->fd >= 0);
fd38203a 3183
c2ba3ad6 3184 if (t == 0) {
1751bdde 3185 /* We don't want to disarm here, just mean some time looooong ago. */
fd38203a
LP
3186 its.it_value.tv_sec = 0;
3187 its.it_value.tv_nsec = 1;
3188 } else
c2ba3ad6 3189 timespec_store(&its.it_value, t);
fd38203a 3190
15c689d7 3191 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 3192 return -errno;
fd38203a 3193
6a0f1f6d 3194 d->next = t;
fd38203a
LP
3195 return 0;
3196}
3197
9a800b56 3198static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
3199 assert(e);
3200 assert(s);
3201 assert(s->type == SOURCE_IO);
3202
9a800b56
LP
3203 /* If the event source was already pending, we just OR in the
3204 * new revents, otherwise we reset the value. The ORing is
3205 * necessary to handle EPOLLONESHOT events properly where
3206 * readability might happen independently of writability, and
3207 * we need to keep track of both */
3208
3209 if (s->pending)
3210 s->io.revents |= revents;
3211 else
3212 s->io.revents = revents;
fd38203a 3213
fd38203a
LP
3214 return source_set_pending(s, true);
3215}
3216
72aedc1e 3217static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
3218 uint64_t x;
3219 ssize_t ss;
3220
3221 assert(e);
da7e457c 3222 assert(fd >= 0);
72aedc1e 3223
305f78bf 3224 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
3225
3226 ss = read(fd, &x, sizeof(x));
3227 if (ss < 0) {
8add30a0 3228 if (ERRNO_IS_TRANSIENT(errno))
fd38203a
LP
3229 return 0;
3230
3231 return -errno;
3232 }
3233
8d35dae7 3234 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
3235 return -EIO;
3236
cde93897 3237 if (next)
3a43da28 3238 *next = USEC_INFINITY;
72aedc1e 3239
fd38203a
LP
3240 return 0;
3241}
3242
305f78bf
LP
3243static int process_timer(
3244 sd_event *e,
3245 usec_t n,
6a0f1f6d 3246 struct clock_data *d) {
305f78bf 3247
fd38203a 3248 sd_event_source *s;
fd69f224 3249 bool callback_invoked = false;
fd38203a
LP
3250 int r;
3251
3252 assert(e);
6a0f1f6d 3253 assert(d);
fd38203a
LP
3254
3255 for (;;) {
6a0f1f6d 3256 s = prioq_peek(d->earliest);
19947509
ZJS
3257 assert(!s || EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
3258
b6d5481b
LP
3259 if (!s || time_event_source_next(s) > n)
3260 break;
3261
3262 if (s->ratelimited) {
3263 /* This is an event sources whose ratelimit window has ended. Let's turn it on
3264 * again. */
3265 assert(s->ratelimited);
3266
fd69f224 3267 r = event_source_leave_ratelimit(s, /* run_callback */ true);
b6d5481b
LP
3268 if (r < 0)
3269 return r;
fd69f224
MS
3270 else if (r == 1)
3271 callback_invoked = true;
b6d5481b
LP
3272
3273 continue;
3274 }
3275
3276 if (s->enabled == SD_EVENT_OFF || s->pending)
fd38203a
LP
3277 break;
3278
3279 r = source_set_pending(s, true);
3280 if (r < 0)
3281 return r;
3282
e1951c16 3283 event_source_time_prioq_reshuffle(s);
fd38203a
LP
3284 }
3285
fd69f224 3286 return callback_invoked;
fd38203a
LP
3287}
3288
efd3be9d
YW
3289static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
3290 int64_t min_priority = threshold;
3291 bool something_new = false;
fd38203a 3292 sd_event_source *s;
fd38203a
LP
3293 int r;
3294
3295 assert(e);
efd3be9d
YW
3296 assert(ret_min_priority);
3297
3298 if (!e->need_process_child) {
3299 *ret_min_priority = min_priority;
3300 return 0;
3301 }
fd38203a 3302
c2ba3ad6
LP
3303 e->need_process_child = false;
3304
91c70071
YW
3305 /* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait
3306 * for, instead of using P_ALL. This is because we only want to get child information of very
3307 * specific child processes, and not all of them. We might not have processed the SIGCHLD event
3308 * of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
3309 * hence we really don't want anything flushed out of the kernel's queue that we don't care
3310 * about. Since this is O(n) this means that if you have a lot of processes you probably want
3311 * to handle SIGCHLD yourself.
3312 *
3313 * We do not reap the children here (by using WNOWAIT), this is only done after the event
3314 * source is dispatched so that the callback still sees the process as a zombie. */
fd38203a 3315
90e74a66 3316 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
3317 assert(s->type == SOURCE_CHILD);
3318
efd3be9d
YW
3319 if (s->priority > threshold)
3320 continue;
3321
fd38203a
LP
3322 if (s->pending)
3323 continue;
3324
b6d5481b 3325 if (event_source_is_offline(s))
fd38203a
LP
3326 continue;
3327
f8f3f926
LP
3328 if (s->child.exited)
3329 continue;
3330
91c70071
YW
3331 if (EVENT_SOURCE_WATCH_PIDFD(s))
3332 /* There's a usable pidfd known for this event source? Then don't waitid() for
3333 * it here */
f8f3f926
LP
3334 continue;
3335
fd38203a 3336 zero(s->child.siginfo);
15c689d7
LP
3337 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
3338 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
bfd9bfcc 3339 return negative_errno();
fd38203a
LP
3340
3341 if (s->child.siginfo.si_pid != 0) {
945c2931 3342 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3343
f8f3f926
LP
3344 if (zombie)
3345 s->child.exited = true;
3346
08cd1552 3347 if (!zombie && (s->child.options & WEXITED)) {
91c70071
YW
3348 /* If the child isn't dead then let's immediately remove the state
3349 * change from the queue, since there's no benefit in leaving it
3350 * queued. */
08cd1552
LP
3351
3352 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 3353 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
3354 }
3355
fd38203a
LP
3356 r = source_set_pending(s, true);
3357 if (r < 0)
3358 return r;
efd3be9d
YW
3359 if (r > 0) {
3360 something_new = true;
3361 min_priority = MIN(min_priority, s->priority);
3362 }
fd38203a
LP
3363 }
3364 }
3365
efd3be9d
YW
3366 *ret_min_priority = min_priority;
3367 return something_new;
fd38203a
LP
3368}
3369
f8f3f926
LP
3370static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
3371 assert(e);
3372 assert(s);
3373 assert(s->type == SOURCE_CHILD);
3374
3375 if (s->pending)
3376 return 0;
3377
b6d5481b 3378 if (event_source_is_offline(s))
f8f3f926
LP
3379 return 0;
3380
3381 if (!EVENT_SOURCE_WATCH_PIDFD(s))
3382 return 0;
3383
3384 zero(s->child.siginfo);
3385 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
3386 return -errno;
3387
3388 if (s->child.siginfo.si_pid == 0)
3389 return 0;
3390
3391 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
3392 s->child.exited = true;
3393
3394 return source_set_pending(s, true);
3395}
3396
efd3be9d 3397static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
fd38203a
LP
3398 int r;
3399
da7e457c 3400 assert(e);
97ef5391 3401 assert(d);
305f78bf 3402 assert_return(events == EPOLLIN, -EIO);
efd3be9d 3403 assert(min_priority);
fd38203a 3404
91c70071
YW
3405 /* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make
3406 * sure to recheck the children we watch. This is because we only ever dequeue the first signal
3407 * per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know,
3408 * but we might have higher priority children we care about hence we need to check that
3409 * explicitly. */
9da4cb2b
LP
3410
3411 if (sigismember(&d->sigset, SIGCHLD))
3412 e->need_process_child = true;
3413
91c70071 3414 /* If there's already an event source pending for this priority we don't read another */
9da4cb2b
LP
3415 if (d->current)
3416 return 0;
3417
fd38203a 3418 for (;;) {
0eb2e0e3 3419 struct signalfd_siginfo si;
7057bd99 3420 ssize_t n;
92daebc0 3421 sd_event_source *s = NULL;
fd38203a 3422
9da4cb2b 3423 n = read(d->fd, &si, sizeof(si));
7057bd99 3424 if (n < 0) {
8add30a0 3425 if (ERRNO_IS_TRANSIENT(errno))
efd3be9d 3426 return 0;
fd38203a
LP
3427
3428 return -errno;
3429 }
3430
7057bd99 3431 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3432 return -EIO;
3433
6eb7c172 3434 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3435
92daebc0
LP
3436 if (e->signal_sources)
3437 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3438 if (!s)
3439 continue;
9da4cb2b
LP
3440 if (s->pending)
3441 continue;
fd38203a
LP
3442
3443 s->signal.siginfo = si;
9da4cb2b
LP
3444 d->current = s;
3445
fd38203a
LP
3446 r = source_set_pending(s, true);
3447 if (r < 0)
3448 return r;
efd3be9d
YW
3449 if (r > 0 && *min_priority >= s->priority) {
3450 *min_priority = s->priority;
3451 return 1; /* an event source with smaller priority is queued. */
3452 }
9da4cb2b 3453
efd3be9d 3454 return 0;
fd38203a 3455 }
fd38203a
LP
3456}
3457
efd3be9d 3458static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) {
97ef5391
LP
3459 ssize_t n;
3460
3461 assert(e);
3462 assert(d);
3463
3464 assert_return(revents == EPOLLIN, -EIO);
3465
3466 /* If there's already an event source pending for this priority, don't read another */
3467 if (d->n_pending > 0)
3468 return 0;
3469
3470 /* Is the read buffer non-empty? If so, let's not read more */
3471 if (d->buffer_filled > 0)
3472 return 0;
3473
efd3be9d
YW
3474 if (d->priority > threshold)
3475 return 0;
3476
97ef5391
LP
3477 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3478 if (n < 0) {
8add30a0 3479 if (ERRNO_IS_TRANSIENT(errno))
97ef5391
LP
3480 return 0;
3481
3482 return -errno;
3483 }
3484
3485 assert(n > 0);
3486 d->buffer_filled = (size_t) n;
0601b958 3487 LIST_PREPEND(buffered, e->buffered_inotify_data_list, d);
97ef5391
LP
3488
3489 return 1;
3490}
3491
3492static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3493 assert(e);
3494 assert(d);
3495 assert(sz <= d->buffer_filled);
3496
3497 if (sz == 0)
3498 return;
3499
3500 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3501 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3502 d->buffer_filled -= sz;
3503
3504 if (d->buffer_filled == 0)
0601b958 3505 LIST_REMOVE(buffered, e->buffered_inotify_data_list, d);
97ef5391
LP
3506}
3507
3508static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3509 int r;
3510
3511 assert(e);
3512 assert(d);
3513
3514 /* If there's already an event source pending for this priority, don't read another */
3515 if (d->n_pending > 0)
3516 return 0;
3517
3518 while (d->buffer_filled > 0) {
3519 size_t sz;
3520
3521 /* Let's validate that the event structures are complete */
3522 if (d->buffer_filled < offsetof(struct inotify_event, name))
3523 return -EIO;
3524
3525 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3526 if (d->buffer_filled < sz)
3527 return -EIO;
3528
3529 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3530 struct inode_data *inode_data;
97ef5391
LP
3531
3532 /* The queue overran, let's pass this event to all event sources connected to this inotify
3533 * object */
3534
03677889 3535 HASHMAP_FOREACH(inode_data, d->inodes)
97ef5391
LP
3536 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3537
b6d5481b 3538 if (event_source_is_offline(s))
97ef5391
LP
3539 continue;
3540
3541 r = source_set_pending(s, true);
3542 if (r < 0)
3543 return r;
3544 }
97ef5391
LP
3545 } else {
3546 struct inode_data *inode_data;
97ef5391
LP
3547
3548 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3549 * our watch descriptor table. */
3550 if (d->buffer.ev.mask & IN_IGNORED) {
3551
3552 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3553 if (!inode_data) {
3554 event_inotify_data_drop(e, d, sz);
3555 continue;
3556 }
3557
3558 /* The watch descriptor was removed by the kernel, let's drop it here too */
3559 inode_data->wd = -1;
3560 } else {
3561 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3562 if (!inode_data) {
3563 event_inotify_data_drop(e, d, sz);
3564 continue;
3565 }
3566 }
3567
3568 /* Trigger all event sources that are interested in these events. Also trigger all event
3569 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3570 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3571
b6d5481b 3572 if (event_source_is_offline(s))
97ef5391
LP
3573 continue;
3574
3575 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3576 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3577 continue;
3578
3579 r = source_set_pending(s, true);
3580 if (r < 0)
3581 return r;
3582 }
3583 }
3584
3585 /* Something pending now? If so, let's finish, otherwise let's read more. */
3586 if (d->n_pending > 0)
3587 return 1;
3588 }
3589
3590 return 0;
3591}
3592
3593static int process_inotify(sd_event *e) {
97ef5391
LP
3594 int r, done = 0;
3595
3596 assert(e);
3597
0601b958 3598 LIST_FOREACH(buffered, d, e->buffered_inotify_data_list) {
97ef5391
LP
3599 r = event_inotify_data_process(e, d);
3600 if (r < 0)
3601 return r;
3602 if (r > 0)
3603 done ++;
3604 }
3605
3606 return done;
3607}
3608
fd38203a 3609static int source_dispatch(sd_event_source *s) {
8f5c235d 3610 EventSourceType saved_type;
c8e9d15c 3611 sd_event *saved_event;
fe8245eb 3612 int r = 0;
fd38203a
LP
3613
3614 assert(s);
6203e07a 3615 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3616
b778cba4
LP
3617 /* Save the event source type, here, so that we still know it after the event callback which might
3618 * invalidate the event. */
8f5c235d
LP
3619 saved_type = s->type;
3620
de02634c 3621 /* Similarly, store a reference to the event loop object, so that we can still access it after the
b778cba4 3622 * callback might have invalidated/disconnected the event source. */
c8e9d15c
YW
3623 saved_event = s->event;
3624 PROTECT_EVENT(saved_event);
b778cba4 3625
de02634c 3626 /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
b6d5481b
LP
3627 assert(!s->ratelimited);
3628 if (!ratelimit_below(&s->rate_limit)) {
3629 r = event_source_enter_ratelimited(s);
3630 if (r < 0)
3631 return r;
3632
3633 return 1;
3634 }
3635
945c2931 3636 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3637 r = source_set_pending(s, false);
3638 if (r < 0)
3639 return r;
3640 }
fd38203a 3641
6e9feda3
LP
3642 if (s->type != SOURCE_POST) {
3643 sd_event_source *z;
6e9feda3 3644
de02634c 3645 /* If we execute a non-post source, let's mark all post sources as pending. */
6e9feda3 3646
90e74a66 3647 SET_FOREACH(z, s->event->post_sources) {
b6d5481b 3648 if (event_source_is_offline(z))
6e9feda3
LP
3649 continue;
3650
3651 r = source_set_pending(z, true);
3652 if (r < 0)
3653 return r;
3654 }
3655 }
3656
baf76283
LP
3657 if (s->enabled == SD_EVENT_ONESHOT) {
3658 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3659 if (r < 0)
3660 return r;
3661 }
3662
12179984 3663 s->dispatching = true;
b7484e2a 3664
fd38203a
LP
3665 switch (s->type) {
3666
3667 case SOURCE_IO:
3668 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3669 break;
3670
6a0f1f6d 3671 case SOURCE_TIME_REALTIME:
a8548816 3672 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3673 case SOURCE_TIME_MONOTONIC:
3674 case SOURCE_TIME_REALTIME_ALARM:
3675 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3676 r = s->time.callback(s, s->time.next, s->userdata);
3677 break;
3678
3679 case SOURCE_SIGNAL:
3680 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3681 break;
3682
08cd1552
LP
3683 case SOURCE_CHILD: {
3684 bool zombie;
3685
945c2931 3686 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3687
fd38203a 3688 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3689
3690 /* Now, reap the PID for good. */
f8f3f926 3691 if (zombie) {
cc59d290 3692 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3693 s->child.waited = true;
3694 }
08cd1552 3695
fd38203a 3696 break;
08cd1552 3697 }
fd38203a
LP
3698
3699 case SOURCE_DEFER:
3700 r = s->defer.callback(s, s->userdata);
3701 break;
da7e457c 3702
6e9feda3
LP
3703 case SOURCE_POST:
3704 r = s->post.callback(s, s->userdata);
3705 break;
3706
6203e07a
LP
3707 case SOURCE_EXIT:
3708 r = s->exit.callback(s, s->userdata);
da7e457c 3709 break;
9d3e3aa5 3710
97ef5391
LP
3711 case SOURCE_INOTIFY: {
3712 struct sd_event *e = s->event;
3713 struct inotify_data *d;
3714 size_t sz;
3715
3716 assert(s->inotify.inode_data);
3717 assert_se(d = s->inotify.inode_data->inotify_data);
3718
3719 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3720 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3721 assert(d->buffer_filled >= sz);
3722
53baf2ef
LP
3723 /* If the inotify callback destroys the event source then this likely means we don't need to
3724 * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd
3725 * free it immediately, then we couldn't drop the event from the inotify event queue without
3726 * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it
3727 * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then
3728 * explicitly GC it after we are done dropping the inotify event from the buffer. */
3729 d->n_busy++;
97ef5391 3730 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
53baf2ef 3731 d->n_busy--;
97ef5391 3732
53baf2ef
LP
3733 /* When no event is pending anymore on this inotify object, then let's drop the event from
3734 * the inotify event queue buffer. */
97ef5391
LP
3735 if (d->n_pending == 0)
3736 event_inotify_data_drop(e, d, sz);
3737
53baf2ef
LP
3738 /* Now we don't want to access 'd' anymore, it's OK to GC now. */
3739 event_gc_inotify_data(e, d);
97ef5391
LP
3740 break;
3741 }
3742
9d3e3aa5 3743 case SOURCE_WATCHDOG:
a71fe8b8 3744 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3745 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
04499a70 3746 assert_not_reached();
fd38203a
LP
3747 }
3748
12179984
LP
3749 s->dispatching = false;
3750
b778cba4
LP
3751 if (r < 0) {
3752 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3753 strna(s->description),
3754 event_source_type_to_string(saved_type),
3755 s->exit_on_failure ? "exiting" : "disabling");
3756
3757 if (s->exit_on_failure)
3758 (void) sd_event_exit(saved_event, r);
3759 }
12179984
LP
3760
3761 if (s->n_ref == 0)
3762 source_free(s);
3763 else if (r < 0)
c3c50474 3764 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
b7484e2a 3765
6203e07a 3766 return 1;
fd38203a
LP
3767}
3768
3769static int event_prepare(sd_event *e) {
3770 int r;
3771
3772 assert(e);
3773
3774 for (;;) {
3775 sd_event_source *s;
3776
3777 s = prioq_peek(e->prepare);
b6d5481b 3778 if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s))
fd38203a
LP
3779 break;
3780
3781 s->prepare_iteration = e->iteration;
8656f4a6 3782 prioq_reshuffle(e->prepare, s, &s->prepare_index);
fd38203a
LP
3783
3784 assert(s->prepare);
12179984 3785 s->dispatching = true;
fd38203a 3786 r = s->prepare(s, s->userdata);
12179984
LP
3787 s->dispatching = false;
3788
b778cba4
LP
3789 if (r < 0) {
3790 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3791 strna(s->description),
3792 event_source_type_to_string(s->type),
3793 s->exit_on_failure ? "exiting" : "disabling");
3794
3795 if (s->exit_on_failure)
3796 (void) sd_event_exit(e, r);
3797 }
fd38203a 3798
12179984
LP
3799 if (s->n_ref == 0)
3800 source_free(s);
3801 else if (r < 0)
c3c50474 3802 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
fd38203a
LP
3803 }
3804
3805 return 0;
3806}
3807
6203e07a 3808static int dispatch_exit(sd_event *e) {
da7e457c
LP
3809 sd_event_source *p;
3810 int r;
3811
3812 assert(e);
3813
6203e07a 3814 p = prioq_peek(e->exit);
19947509
ZJS
3815 assert(!p || p->type == SOURCE_EXIT);
3816
b6d5481b 3817 if (!p || event_source_is_offline(p)) {
da7e457c
LP
3818 e->state = SD_EVENT_FINISHED;
3819 return 0;
3820 }
3821
c8e9d15c 3822 PROTECT_EVENT(e);
da7e457c 3823 e->iteration++;
6203e07a 3824 e->state = SD_EVENT_EXITING;
da7e457c 3825 r = source_dispatch(p);
2b0c9ef7 3826 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3827 return r;
3828}
3829
c2ba3ad6
LP
3830static sd_event_source* event_next_pending(sd_event *e) {
3831 sd_event_source *p;
3832
da7e457c
LP
3833 assert(e);
3834
c2ba3ad6
LP
3835 p = prioq_peek(e->pending);
3836 if (!p)
3837 return NULL;
3838
b6d5481b 3839 if (event_source_is_offline(p))
c2ba3ad6
LP
3840 return NULL;
3841
3842 return p;
3843}
3844
cde93897
LP
3845static int arm_watchdog(sd_event *e) {
3846 struct itimerspec its = {};
3847 usec_t t;
cde93897
LP
3848
3849 assert(e);
3850 assert(e->watchdog_fd >= 0);
3851
3852 t = sleep_between(e,
a595fb5c
YW
3853 usec_add(e->watchdog_last, (e->watchdog_period / 2)),
3854 usec_add(e->watchdog_last, (e->watchdog_period * 3 / 4)));
cde93897
LP
3855
3856 timespec_store(&its.it_value, t);
3857
75145780
LP
3858 /* Make sure we never set the watchdog to 0, which tells the
3859 * kernel to disable it. */
3860 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3861 its.it_value.tv_nsec = 1;
3862
7c248223 3863 return RET_NERRNO(timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL));
cde93897
LP
3864}
3865
3866static int process_watchdog(sd_event *e) {
3867 assert(e);
3868
3869 if (!e->watchdog)
3870 return 0;
3871
3872 /* Don't notify watchdog too often */
3873 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3874 return 0;
3875
3876 sd_notify(false, "WATCHDOG=1");
3877 e->watchdog_last = e->timestamp.monotonic;
3878
3879 return arm_watchdog(e);
3880}
3881
97ef5391
LP
3882static void event_close_inode_data_fds(sd_event *e) {
3883 struct inode_data *d;
3884
3885 assert(e);
3886
3887 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3888 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
365c2885 3889 * adjustments to the event source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3890 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3891 * compromise. */
3892
ed828563 3893 while ((d = e->inode_data_to_close_list)) {
97ef5391
LP
3894 assert(d->fd >= 0);
3895 d->fd = safe_close(d->fd);
3896
ed828563 3897 LIST_REMOVE(to_close, e->inode_data_to_close_list, d);
97ef5391
LP
3898 }
3899}
3900
c45a5a74
TG
3901_public_ int sd_event_prepare(sd_event *e) {
3902 int r;
fd38203a 3903
da7e457c 3904 assert_return(e, -EINVAL);
b937d761 3905 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3906 assert_return(!event_pid_changed(e), -ECHILD);
3907 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3908 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3909
e5446015
LP
3910 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3911 * this check here once, since gettid() is typically not cached, and thus want to minimize
3912 * syscalls */
3913 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3914
f814c871 3915 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
c8e9d15c 3916 PROTECT_EVENT(e);
f814c871 3917
6203e07a 3918 if (e->exit_requested)
c45a5a74 3919 goto pending;
fd38203a
LP
3920
3921 e->iteration++;
3922
0be6c2f6 3923 e->state = SD_EVENT_PREPARING;
fd38203a 3924 r = event_prepare(e);
0be6c2f6 3925 e->state = SD_EVENT_INITIAL;
fd38203a 3926 if (r < 0)
c45a5a74 3927 return r;
fd38203a 3928
6a0f1f6d
LP
3929 r = event_arm_timer(e, &e->realtime);
3930 if (r < 0)
c45a5a74 3931 return r;
6a0f1f6d 3932
a8548816
TG
3933 r = event_arm_timer(e, &e->boottime);
3934 if (r < 0)
c45a5a74 3935 return r;
a8548816 3936
6a0f1f6d
LP
3937 r = event_arm_timer(e, &e->monotonic);
3938 if (r < 0)
c45a5a74 3939 return r;
6a0f1f6d
LP
3940
3941 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3942 if (r < 0)
c45a5a74 3943 return r;
fd38203a 3944
6a0f1f6d 3945 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3946 if (r < 0)
c45a5a74 3947 return r;
fd38203a 3948
97ef5391
LP
3949 event_close_inode_data_fds(e);
3950
0601b958 3951 if (event_next_pending(e) || e->need_process_child || e->buffered_inotify_data_list)
c45a5a74
TG
3952 goto pending;
3953
2b0c9ef7 3954 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3955
3956 return 0;
3957
3958pending:
2b0c9ef7 3959 e->state = SD_EVENT_ARMED;
6d148a84
TG
3960 r = sd_event_wait(e, 0);
3961 if (r == 0)
2b0c9ef7 3962 e->state = SD_EVENT_ARMED;
6d148a84
TG
3963
3964 return r;
c45a5a74
TG
3965}
3966
798445ab
LP
3967static int epoll_wait_usec(
3968 int fd,
3969 struct epoll_event *events,
3970 int maxevents,
3971 usec_t timeout) {
3972
7c248223 3973 int msec;
0c14c45e
LP
3974 /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not. */
3975
3976#if HAVE_EPOLL_PWAIT2
39f756d3 3977 static bool epoll_pwait2_absent = false;
52bb308c 3978 int r;
798445ab 3979
0c14c45e
LP
3980 /* epoll_pwait2() was added to Linux 5.11 (2021-02-14) and to glibc in 2.35 (2022-02-03). In contrast
3981 * to other syscalls we don't bother with our own fallback syscall wrappers on old libcs, since this
3982 * is not that obvious to implement given the libc and kernel definitions differ in the last
3983 * argument. Moreover, the only reason to use it is the more accurate time-outs (which is not a
3984 * biggie), let's hence rely on glibc's definitions, and fallback to epoll_pwait() when that's
3985 * missing. */
798445ab
LP
3986
3987 if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
798445ab
LP
3988 r = epoll_pwait2(fd,
3989 events,
3990 maxevents,
52bb308c 3991 TIMESPEC_STORE(timeout),
798445ab
LP
3992 NULL);
3993 if (r >= 0)
3994 return r;
7cb45dbf 3995 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
798445ab
LP
3996 return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
3997 * supported. */
3998
3999 epoll_pwait2_absent = true;
4000 }
39f756d3 4001#endif
798445ab
LP
4002
4003 if (timeout == USEC_INFINITY)
4004 msec = -1;
4005 else {
4006 usec_t k;
4007
4008 k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
4009 if (k >= INT_MAX)
4010 msec = INT_MAX; /* Saturate */
4011 else
4012 msec = (int) k;
4013 }
4014
7c248223 4015 return RET_NERRNO(epoll_wait(fd, events, maxevents, msec));
798445ab
LP
4016}
4017
efd3be9d 4018static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
319a4f4b 4019 size_t n_event_queue, m, n_event_max;
efd3be9d
YW
4020 int64_t min_priority = threshold;
4021 bool something_new = false;
798445ab 4022 int r;
c45a5a74 4023
efd3be9d
YW
4024 assert(e);
4025 assert(ret_min_priority);
6a0f1f6d 4026
8b9708d1 4027 n_event_queue = MAX(e->n_sources, 1u);
319a4f4b 4028 if (!GREEDY_REALLOC(e->event_queue, n_event_queue))
5cddd924 4029 return -ENOMEM;
fd38203a 4030
319a4f4b
LP
4031 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
4032
97ef5391 4033 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
0601b958 4034 if (e->buffered_inotify_data_list)
798445ab 4035 timeout = 0;
97ef5391 4036
8b9708d1 4037 for (;;) {
319a4f4b
LP
4038 r = epoll_wait_usec(
4039 e->epoll_fd,
4040 e->event_queue,
4041 n_event_max,
4042 timeout);
798445ab 4043 if (r < 0)
efd3be9d 4044 return r;
c45a5a74 4045
8b9708d1
YW
4046 m = (size_t) r;
4047
319a4f4b 4048 if (m < n_event_max)
8b9708d1
YW
4049 break;
4050
319a4f4b 4051 if (n_event_max >= n_event_queue * 10)
8b9708d1
YW
4052 break;
4053
319a4f4b 4054 if (!GREEDY_REALLOC(e->event_queue, n_event_max + n_event_queue))
8b9708d1
YW
4055 return -ENOMEM;
4056
319a4f4b 4057 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
798445ab 4058 timeout = 0;
da7e457c 4059 }
fd38203a 4060
efd3be9d
YW
4061 /* Set timestamp only when this is called first time. */
4062 if (threshold == INT64_MAX)
4063 triple_timestamp_get(&e->timestamp);
fd38203a 4064
8b9708d1 4065 for (size_t i = 0; i < m; i++) {
fd38203a 4066
5cddd924
LP
4067 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
4068 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 4069 else {
5cddd924 4070 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
4071
4072 switch (*t) {
4073
f8f3f926 4074 case WAKEUP_EVENT_SOURCE: {
5cddd924 4075 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
4076
4077 assert(s);
4078
efd3be9d
YW
4079 if (s->priority > threshold)
4080 continue;
4081
4082 min_priority = MIN(min_priority, s->priority);
4083
f8f3f926
LP
4084 switch (s->type) {
4085
4086 case SOURCE_IO:
5cddd924 4087 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
4088 break;
4089
4090 case SOURCE_CHILD:
5cddd924 4091 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
4092 break;
4093
4094 default:
04499a70 4095 assert_not_reached();
f8f3f926
LP
4096 }
4097
9da4cb2b 4098 break;
f8f3f926 4099 }
fd38203a 4100
9da4cb2b 4101 case WAKEUP_CLOCK_DATA: {
5cddd924 4102 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
4103
4104 assert(d);
4105
5cddd924 4106 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
4107 break;
4108 }
4109
4110 case WAKEUP_SIGNAL_DATA:
efd3be9d 4111 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority);
9da4cb2b
LP
4112 break;
4113
97ef5391 4114 case WAKEUP_INOTIFY_DATA:
efd3be9d 4115 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold);
97ef5391
LP
4116 break;
4117
9da4cb2b 4118 default:
04499a70 4119 assert_not_reached();
9da4cb2b
LP
4120 }
4121 }
efd3be9d
YW
4122 if (r < 0)
4123 return r;
4124 if (r > 0)
4125 something_new = true;
4126 }
4127
4128 *ret_min_priority = min_priority;
4129 return something_new;
4130}
4131
4132_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
4133 int r;
4134
4135 assert_return(e, -EINVAL);
4136 assert_return(e = event_resolve(e), -ENOPKG);
4137 assert_return(!event_pid_changed(e), -ECHILD);
4138 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4139 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
4140
4141 if (e->exit_requested) {
4142 e->state = SD_EVENT_PENDING;
4143 return 1;
4144 }
4145
4146 for (int64_t threshold = INT64_MAX; ; threshold--) {
4147 int64_t epoll_min_priority, child_min_priority;
4148
4149 /* There may be a possibility that new epoll (especially IO) and child events are
4150 * triggered just after process_epoll() call but before process_child(), and the new IO
4151 * events may have higher priority than the child events. To salvage these events,
4152 * let's call epoll_wait() again, but accepts only events with higher priority than the
4153 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
4154 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
4155 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
4156
4157 r = process_epoll(e, timeout, threshold, &epoll_min_priority);
4158 if (r == -EINTR) {
4159 e->state = SD_EVENT_PENDING;
4160 return 1;
4161 }
4162 if (r < 0)
4163 goto finish;
4164 if (r == 0 && threshold < INT64_MAX)
4165 /* No new epoll event. */
4166 break;
4167
4168 r = process_child(e, threshold, &child_min_priority);
fd38203a 4169 if (r < 0)
da7e457c 4170 goto finish;
efd3be9d
YW
4171 if (r == 0)
4172 /* No new child event. */
4173 break;
4174
4175 threshold = MIN(epoll_min_priority, child_min_priority);
4176 if (threshold == INT64_MIN)
4177 break;
4178
4179 timeout = 0;
fd38203a
LP
4180 }
4181
cde93897
LP
4182 r = process_watchdog(e);
4183 if (r < 0)
4184 goto finish;
4185
fd69f224 4186 r = process_inotify(e);
6a0f1f6d
LP
4187 if (r < 0)
4188 goto finish;
4189
fd69f224 4190 r = process_timer(e, e->timestamp.realtime, &e->realtime);
a8548816
TG
4191 if (r < 0)
4192 goto finish;
4193
fd69f224 4194 r = process_timer(e, e->timestamp.boottime, &e->boottime);
6a0f1f6d
LP
4195 if (r < 0)
4196 goto finish;
4197
4198 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 4199 if (r < 0)
da7e457c 4200 goto finish;
fd38203a 4201
e475d10c 4202 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 4203 if (r < 0)
da7e457c 4204 goto finish;
fd38203a 4205
fd69f224 4206 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
97ef5391
LP
4207 if (r < 0)
4208 goto finish;
fd69f224
MS
4209 else if (r == 1) {
4210 /* Ratelimit expiry callback was called. Let's postpone processing pending sources and
4211 * put loop in the initial state in order to evaluate (in the next iteration) also sources
4212 * there were potentially re-enabled by the callback.
4213 *
4214 * Wondering why we treat only this invocation of process_timer() differently? Once event
4215 * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence
4216 * ratelimit expiry callback is never called for any other timer type. */
4217 r = 0;
4218 goto finish;
4219 }
97ef5391 4220
c45a5a74
TG
4221 if (event_next_pending(e)) {
4222 e->state = SD_EVENT_PENDING;
c45a5a74 4223 return 1;
da7e457c
LP
4224 }
4225
c45a5a74 4226 r = 0;
fd38203a 4227
da7e457c 4228finish:
2b0c9ef7 4229 e->state = SD_EVENT_INITIAL;
da7e457c
LP
4230
4231 return r;
fd38203a
LP
4232}
4233
c45a5a74
TG
4234_public_ int sd_event_dispatch(sd_event *e) {
4235 sd_event_source *p;
4236 int r;
4237
4238 assert_return(e, -EINVAL);
b937d761 4239 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4240 assert_return(!event_pid_changed(e), -ECHILD);
4241 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4242 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
4243
4244 if (e->exit_requested)
4245 return dispatch_exit(e);
4246
4247 p = event_next_pending(e);
4248 if (p) {
c8e9d15c 4249 PROTECT_EVENT(e);
c45a5a74
TG
4250
4251 e->state = SD_EVENT_RUNNING;
4252 r = source_dispatch(p);
2b0c9ef7 4253 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4254 return r;
4255 }
4256
2b0c9ef7 4257 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4258
4259 return 1;
4260}
4261
34b87517 4262static void event_log_delays(sd_event *e) {
442ac269
YW
4263 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
4264 size_t l, i;
34b87517 4265
442ac269
YW
4266 p = b;
4267 l = sizeof(b);
4268 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
4269 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
4270 e->delays[i] = 0;
4271 }
442ac269 4272 log_debug("Event loop iterations: %s", b);
34b87517
VC
4273}
4274
c45a5a74
TG
4275_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
4276 int r;
4277
4278 assert_return(e, -EINVAL);
b937d761 4279 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4280 assert_return(!event_pid_changed(e), -ECHILD);
4281 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 4282 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 4283
e6a7bee5 4284 if (e->profile_delays && e->last_run_usec != 0) {
34b87517
VC
4285 usec_t this_run;
4286 unsigned l;
4287
4288 this_run = now(CLOCK_MONOTONIC);
4289
58c34be8 4290 l = log2u64(this_run - e->last_run_usec);
cb9d621e 4291 assert(l < ELEMENTSOF(e->delays));
34b87517
VC
4292 e->delays[l]++;
4293
e6a7bee5 4294 if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
34b87517 4295 event_log_delays(e);
e6a7bee5 4296 e->last_log_usec = this_run;
34b87517
VC
4297 }
4298 }
4299
f814c871 4300 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
c8e9d15c 4301 PROTECT_EVENT(e);
f814c871 4302
c45a5a74 4303 r = sd_event_prepare(e);
53bac4e0
LP
4304 if (r == 0)
4305 /* There was nothing? Then wait... */
4306 r = sd_event_wait(e, timeout);
c45a5a74 4307
34b87517 4308 if (e->profile_delays)
e6a7bee5 4309 e->last_run_usec = now(CLOCK_MONOTONIC);
34b87517 4310
02d30981 4311 if (r > 0) {
53bac4e0 4312 /* There's something now, then let's dispatch it */
02d30981
TG
4313 r = sd_event_dispatch(e);
4314 if (r < 0)
4315 return r;
53bac4e0
LP
4316
4317 return 1;
4318 }
4319
4320 return r;
c45a5a74
TG
4321}
4322
f7262a9f 4323_public_ int sd_event_loop(sd_event *e) {
fd38203a
LP
4324 int r;
4325
da7e457c 4326 assert_return(e, -EINVAL);
b937d761 4327 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 4328 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 4329 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 4330
c8e9d15c 4331 PROTECT_EVENT(e);
fd38203a 4332
da7e457c 4333 while (e->state != SD_EVENT_FINISHED) {
f5fbe71d 4334 r = sd_event_run(e, UINT64_MAX);
fd38203a 4335 if (r < 0)
30dd293c 4336 return r;
fd38203a
LP
4337 }
4338
30dd293c 4339 return e->exit_code;
fd38203a
LP
4340}
4341
9b364545 4342_public_ int sd_event_get_fd(sd_event *e) {
9b364545 4343 assert_return(e, -EINVAL);
b937d761 4344 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
4345 assert_return(!event_pid_changed(e), -ECHILD);
4346
4347 return e->epoll_fd;
4348}
4349
f7262a9f 4350_public_ int sd_event_get_state(sd_event *e) {
da7e457c 4351 assert_return(e, -EINVAL);
b937d761 4352 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4353 assert_return(!event_pid_changed(e), -ECHILD);
4354
4355 return e->state;
4356}
4357
6203e07a 4358_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 4359 assert_return(e, -EINVAL);
b937d761 4360 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 4361 assert_return(code, -EINVAL);
da7e457c 4362 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4363
6203e07a
LP
4364 if (!e->exit_requested)
4365 return -ENODATA;
4366
4367 *code = e->exit_code;
4368 return 0;
fd38203a
LP
4369}
4370
6203e07a 4371_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 4372 assert_return(e, -EINVAL);
b937d761 4373 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4374 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4375 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4376
6203e07a
LP
4377 e->exit_requested = true;
4378 e->exit_code = code;
4379
fd38203a
LP
4380 return 0;
4381}
46e8c825 4382
6a0f1f6d 4383_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 4384 assert_return(e, -EINVAL);
b937d761 4385 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 4386 assert_return(usec, -EINVAL);
46e8c825
LP
4387 assert_return(!event_pid_changed(e), -ECHILD);
4388
e475d10c
LP
4389 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
4390 return -EOPNOTSUPP;
4391
e475d10c 4392 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 4393 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
4394 *usec = now(clock);
4395 return 1;
4396 }
46e8c825 4397
e475d10c 4398 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
4399 return 0;
4400}
afc6adb5
LP
4401
4402_public_ int sd_event_default(sd_event **ret) {
39883f62 4403 sd_event *e = NULL;
afc6adb5
LP
4404 int r;
4405
4406 if (!ret)
4407 return !!default_event;
4408
4409 if (default_event) {
4410 *ret = sd_event_ref(default_event);
4411 return 0;
4412 }
4413
4414 r = sd_event_new(&e);
4415 if (r < 0)
4416 return r;
4417
4418 e->default_event_ptr = &default_event;
4419 e->tid = gettid();
4420 default_event = e;
4421
4422 *ret = e;
4423 return 1;
4424}
4425
4426_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
4427 assert_return(e, -EINVAL);
b937d761 4428 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 4429 assert_return(tid, -EINVAL);
76b54375 4430 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 4431
76b54375
LP
4432 if (e->tid != 0) {
4433 *tid = e->tid;
4434 return 0;
4435 }
4436
4437 return -ENXIO;
afc6adb5 4438}
cde93897
LP
4439
4440_public_ int sd_event_set_watchdog(sd_event *e, int b) {
4441 int r;
4442
4443 assert_return(e, -EINVAL);
b937d761 4444 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 4445 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
4446
4447 if (e->watchdog == !!b)
4448 return e->watchdog;
4449
4450 if (b) {
09812eb7
LP
4451 r = sd_watchdog_enabled(false, &e->watchdog_period);
4452 if (r <= 0)
cde93897 4453 return r;
cde93897
LP
4454
4455 /* Issue first ping immediately */
4456 sd_notify(false, "WATCHDOG=1");
4457 e->watchdog_last = now(CLOCK_MONOTONIC);
4458
4459 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
4460 if (e->watchdog_fd < 0)
4461 return -errno;
4462
4463 r = arm_watchdog(e);
4464 if (r < 0)
4465 goto fail;
4466
1eac7948 4467 struct epoll_event ev = {
a82f89aa
LP
4468 .events = EPOLLIN,
4469 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
4470 };
cde93897 4471
15c689d7 4472 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
4473 r = -errno;
4474 goto fail;
4475 }
4476
4477 } else {
4478 if (e->watchdog_fd >= 0) {
5a795bff 4479 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 4480 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4481 }
4482 }
4483
4484 e->watchdog = !!b;
4485 return e->watchdog;
4486
4487fail:
03e334a1 4488 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4489 return r;
4490}
8f726607
LP
4491
4492_public_ int sd_event_get_watchdog(sd_event *e) {
4493 assert_return(e, -EINVAL);
b937d761 4494 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
4495 assert_return(!event_pid_changed(e), -ECHILD);
4496
4497 return e->watchdog;
4498}
60a3b1e1
LP
4499
4500_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
4501 assert_return(e, -EINVAL);
b937d761 4502 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
4503 assert_return(!event_pid_changed(e), -ECHILD);
4504
4505 *ret = e->iteration;
4506 return 0;
4507}
15723a1d
LP
4508
4509_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
4510 assert_return(s, -EINVAL);
4511
4512 s->destroy_callback = callback;
4513 return 0;
4514}
4515
4516_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
4517 assert_return(s, -EINVAL);
4518
4519 if (ret)
4520 *ret = s->destroy_callback;
4521
4522 return !!s->destroy_callback;
4523}
2382c936
YW
4524
4525_public_ int sd_event_source_get_floating(sd_event_source *s) {
4526 assert_return(s, -EINVAL);
4527
4528 return s->floating;
4529}
4530
4531_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
4532 assert_return(s, -EINVAL);
4533
4534 if (s->floating == !!b)
4535 return 0;
4536
4537 if (!s->event) /* Already disconnected */
4538 return -ESTALE;
4539
4540 s->floating = b;
4541
4542 if (b) {
4543 sd_event_source_ref(s);
4544 sd_event_unref(s->event);
4545 } else {
4546 sd_event_ref(s->event);
4547 sd_event_source_unref(s);
4548 }
4549
4550 return 1;
4551}
b778cba4
LP
4552
4553_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4554 assert_return(s, -EINVAL);
4555 assert_return(s->type != SOURCE_EXIT, -EDOM);
4556
4557 return s->exit_on_failure;
4558}
4559
4560_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4561 assert_return(s, -EINVAL);
4562 assert_return(s->type != SOURCE_EXIT, -EDOM);
4563
4564 if (s->exit_on_failure == !!b)
4565 return 0;
4566
4567 s->exit_on_failure = b;
4568 return 1;
4569}
b6d5481b
LP
4570
4571_public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) {
4572 int r;
4573
4574 assert_return(s, -EINVAL);
4575
4576 /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
4577 * so is a programming error. */
4578 assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM);
4579
4580 /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
4581 * non-ratelimited. */
fd69f224 4582 r = event_source_leave_ratelimit(s, /* run_callback */ false);
b6d5481b
LP
4583 if (r < 0)
4584 return r;
4585
4586 s->rate_limit = (RateLimit) { interval, burst };
4587 return 0;
fd69f224
MS
4588}
4589
4590_public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) {
4591 assert_return(s, -EINVAL);
4592
4593 s->ratelimit_expire_callback = callback;
4594 return 0;
b6d5481b
LP
4595}
4596
4597_public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) {
4598 assert_return(s, -EINVAL);
4599
6dd3b818
YW
4600 /* Querying whether an event source has ratelimiting configured is not a loggable offense, hence
4601 * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error. */
b6d5481b
LP
4602 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4603 return -EDOM;
4604
4605 if (!ratelimit_configured(&s->rate_limit))
4606 return -ENOEXEC;
4607
4608 if (ret_interval)
4609 *ret_interval = s->rate_limit.interval;
4610 if (ret_burst)
4611 *ret_burst = s->rate_limit.burst;
4612
4613 return 0;
4614}
4615
4616_public_ int sd_event_source_is_ratelimited(sd_event_source *s) {
4617 assert_return(s, -EINVAL);
4618
4619 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4620 return false;
4621
4622 if (!ratelimit_configured(&s->rate_limit))
4623 return false;
4624
4625 return s->ratelimited;
4626}
baf3fdec
LP
4627
4628_public_ int sd_event_set_signal_exit(sd_event *e, int b) {
4629 bool change = false;
4630 int r;
4631
4632 assert_return(e, -EINVAL);
4633
4634 if (b) {
4635 /* We want to maintain pointers to these event sources, so that we can destroy them when told
4636 * so. But we also don't want them to pin the event loop itself. Hence we mark them as
4637 * floating after creation (and undo this before deleting them again). */
4638
4639 if (!e->sigint_event_source) {
4640 r = sd_event_add_signal(e, &e->sigint_event_source, SIGINT | SD_EVENT_SIGNAL_PROCMASK, NULL, NULL);
4641 if (r < 0)
4642 return r;
4643
4644 assert(sd_event_source_set_floating(e->sigint_event_source, true) >= 0);
4645 change = true;
4646 }
4647
4648 if (!e->sigterm_event_source) {
4649 r = sd_event_add_signal(e, &e->sigterm_event_source, SIGTERM | SD_EVENT_SIGNAL_PROCMASK, NULL, NULL);
4650 if (r < 0) {
4651 if (change) {
4652 assert(sd_event_source_set_floating(e->sigint_event_source, false) >= 0);
4653 e->sigint_event_source = sd_event_source_unref(e->sigint_event_source);
4654 }
4655
4656 return r;
4657 }
4658
4659 assert(sd_event_source_set_floating(e->sigterm_event_source, true) >= 0);
4660 change = true;
4661 }
4662
4663 } else {
4664 if (e->sigint_event_source) {
4665 assert(sd_event_source_set_floating(e->sigint_event_source, false) >= 0);
4666 e->sigint_event_source = sd_event_source_unref(e->sigint_event_source);
4667 change = true;
4668 }
4669
4670 if (e->sigterm_event_source) {
4671 assert(sd_event_source_set_floating(e->sigterm_event_source, false) >= 0);
4672 e->sigterm_event_source = sd_event_source_unref(e->sigterm_event_source);
4673 change = true;
4674 }
4675 }
4676
4677 return change;
4678}