]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
Revert "Mount all fs nosuid when NoNewPrivileges=yes"
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
b6d5481b
LP
40static bool event_source_is_online(sd_event_source *s) {
41 assert(s);
42 return s->enabled != SD_EVENT_OFF && !s->ratelimited;
43}
44
45static bool event_source_is_offline(sd_event_source *s) {
46 assert(s);
47 return s->enabled == SD_EVENT_OFF || s->ratelimited;
48}
49
55cbfaa5
DM
50static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
51 [SOURCE_IO] = "io",
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 63 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
64};
65
66DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67
b6d5481b
LP
68#define EVENT_SOURCE_IS_TIME(t) \
69 IN_SET((t), \
70 SOURCE_TIME_REALTIME, \
71 SOURCE_TIME_BOOTTIME, \
72 SOURCE_TIME_MONOTONIC, \
73 SOURCE_TIME_REALTIME_ALARM, \
74 SOURCE_TIME_BOOTTIME_ALARM)
75
76#define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
77 IN_SET((t), \
78 SOURCE_IO, \
79 SOURCE_TIME_REALTIME, \
80 SOURCE_TIME_BOOTTIME, \
81 SOURCE_TIME_MONOTONIC, \
82 SOURCE_TIME_REALTIME_ALARM, \
83 SOURCE_TIME_BOOTTIME_ALARM, \
84 SOURCE_SIGNAL, \
85 SOURCE_DEFER, \
86 SOURCE_INOTIFY)
6a0f1f6d 87
fd38203a 88struct sd_event {
da7e457c 89 unsigned n_ref;
fd38203a
LP
90
91 int epoll_fd;
cde93897 92 int watchdog_fd;
fd38203a
LP
93
94 Prioq *pending;
95 Prioq *prepare;
c2ba3ad6 96
a8548816 97 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
98 * can add support for more clocks when the kernel learns to
99 * deal with them, too. */
100 struct clock_data realtime;
a8548816 101 struct clock_data boottime;
6a0f1f6d
LP
102 struct clock_data monotonic;
103 struct clock_data realtime_alarm;
104 struct clock_data boottime_alarm;
fd38203a 105
da7e457c
LP
106 usec_t perturb;
107
9da4cb2b
LP
108 sd_event_source **signal_sources; /* indexed by signal number */
109 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
110
111 Hashmap *child_sources;
b6d5481b 112 unsigned n_online_child_sources;
fd38203a 113
6e9feda3
LP
114 Set *post_sources;
115
6203e07a 116 Prioq *exit;
fd38203a 117
97ef5391
LP
118 Hashmap *inotify_data; /* indexed by priority */
119
120 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
121 LIST_HEAD(struct inode_data, inode_data_to_close);
122
123 /* A list of inotify objects that already have events buffered which aren't processed yet */
124 LIST_HEAD(struct inotify_data, inotify_data_buffered);
125
da7e457c 126 pid_t original_pid;
c2ba3ad6 127
60a3b1e1 128 uint64_t iteration;
e475d10c 129 triple_timestamp timestamp;
da7e457c 130 int state;
eaa3cbef 131
6203e07a 132 bool exit_requested:1;
da7e457c 133 bool need_process_child:1;
cde93897 134 bool watchdog:1;
34b87517 135 bool profile_delays:1;
afc6adb5 136
6203e07a
LP
137 int exit_code;
138
afc6adb5
LP
139 pid_t tid;
140 sd_event **default_event_ptr;
cde93897
LP
141
142 usec_t watchdog_last, watchdog_period;
15b38f93
LP
143
144 unsigned n_sources;
a71fe8b8 145
5cddd924 146 struct epoll_event *event_queue;
5cddd924 147
a71fe8b8 148 LIST_HEAD(sd_event_source, sources);
34b87517 149
e6a7bee5 150 usec_t last_run_usec, last_log_usec;
34b87517 151 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
152};
153
b937d761
NM
154static thread_local sd_event *default_event = NULL;
155
a71fe8b8 156static void source_disconnect(sd_event_source *s);
97ef5391 157static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 158
b937d761
NM
159static sd_event *event_resolve(sd_event *e) {
160 return e == SD_EVENT_DEFAULT ? default_event : e;
161}
162
fd38203a
LP
163static int pending_prioq_compare(const void *a, const void *b) {
164 const sd_event_source *x = a, *y = b;
9c57a73b 165 int r;
fd38203a
LP
166
167 assert(x->pending);
168 assert(y->pending);
169
baf76283
LP
170 /* Enabled ones first */
171 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 172 return -1;
baf76283 173 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
174 return 1;
175
b6d5481b
LP
176 /* Non rate-limited ones first. */
177 r = CMP(!!x->ratelimited, !!y->ratelimited);
178 if (r != 0)
179 return r;
180
fd38203a 181 /* Lower priority values first */
9c57a73b
YW
182 r = CMP(x->priority, y->priority);
183 if (r != 0)
184 return r;
fd38203a
LP
185
186 /* Older entries first */
9c57a73b 187 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
188}
189
190static int prepare_prioq_compare(const void *a, const void *b) {
191 const sd_event_source *x = a, *y = b;
9c57a73b 192 int r;
fd38203a
LP
193
194 assert(x->prepare);
195 assert(y->prepare);
196
8046c457
KK
197 /* Enabled ones first */
198 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
199 return -1;
200 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
201 return 1;
202
b6d5481b
LP
203 /* Non rate-limited ones first. */
204 r = CMP(!!x->ratelimited, !!y->ratelimited);
205 if (r != 0)
206 return r;
207
fd38203a
LP
208 /* Move most recently prepared ones last, so that we can stop
209 * preparing as soon as we hit one that has already been
210 * prepared in the current iteration */
9c57a73b
YW
211 r = CMP(x->prepare_iteration, y->prepare_iteration);
212 if (r != 0)
213 return r;
fd38203a 214
fd38203a 215 /* Lower priority values first */
9c57a73b 216 return CMP(x->priority, y->priority);
fd38203a
LP
217}
218
b6d5481b
LP
219static usec_t time_event_source_next(const sd_event_source *s) {
220 assert(s);
221
222 /* We have two kinds of event sources that have elapsation times associated with them: the actual
223 * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
224 * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
225 * looking at here. */
226
227 if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
228 assert(s->rate_limit.begin != 0);
229 assert(s->rate_limit.interval != 0);
230 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
231 }
232
233 /* Otherwise this must be a time event source, if not ratelimited */
234 if (EVENT_SOURCE_IS_TIME(s->type))
235 return s->time.next;
236
237 return USEC_INFINITY;
238}
239
1bce0ffa 240static usec_t time_event_source_latest(const sd_event_source *s) {
b6d5481b
LP
241 assert(s);
242
243 if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the
244 * same, as we should avoid adding additional inaccuracy on an inaccuracy time
245 * window */
246 assert(s->rate_limit.begin != 0);
247 assert(s->rate_limit.interval != 0);
248 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
249 }
250
251 /* Must be a time event source, if not ratelimited */
252 if (EVENT_SOURCE_IS_TIME(s->type))
253 return usec_add(s->time.next, s->time.accuracy);
254
255 return USEC_INFINITY;
1bce0ffa
LP
256}
257
81107b84
LP
258static bool event_source_timer_candidate(const sd_event_source *s) {
259 assert(s);
260
261 /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
262 * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
263 return !s->pending || s->ratelimited;
264}
265
266static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) {
c2ba3ad6
LP
267 const sd_event_source *x = a, *y = b;
268
baf76283
LP
269 /* Enabled ones first */
270 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
c2ba3ad6 271 return -1;
baf76283 272 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
c2ba3ad6
LP
273 return 1;
274
81107b84
LP
275 /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
276 if (event_source_timer_candidate(x) && !event_source_timer_candidate(y))
c2ba3ad6 277 return -1;
81107b84 278 if (!event_source_timer_candidate(x) && event_source_timer_candidate(y))
c2ba3ad6
LP
279 return 1;
280
281 /* Order by time */
81107b84
LP
282 return CMP(time_func(x), time_func(y));
283}
284
285static int earliest_time_prioq_compare(const void *a, const void *b) {
286 return time_prioq_compare(a, b, time_event_source_next);
287}
288
289static int latest_time_prioq_compare(const void *a, const void *b) {
290 return time_prioq_compare(a, b, time_event_source_latest);
c2ba3ad6
LP
291}
292
6203e07a 293static int exit_prioq_compare(const void *a, const void *b) {
da7e457c
LP
294 const sd_event_source *x = a, *y = b;
295
6203e07a
LP
296 assert(x->type == SOURCE_EXIT);
297 assert(y->type == SOURCE_EXIT);
da7e457c 298
baf76283
LP
299 /* Enabled ones first */
300 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
da7e457c 301 return -1;
baf76283 302 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
da7e457c
LP
303 return 1;
304
305 /* Lower priority values first */
6dd91b36 306 return CMP(x->priority, y->priority);
da7e457c
LP
307}
308
6a0f1f6d
LP
309static void free_clock_data(struct clock_data *d) {
310 assert(d);
9da4cb2b 311 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
312
313 safe_close(d->fd);
314 prioq_free(d->earliest);
315 prioq_free(d->latest);
316}
317
8301aa0b 318static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
319 sd_event_source *s;
320
fd38203a 321 assert(e);
a71fe8b8
LP
322
323 while ((s = e->sources)) {
324 assert(s->floating);
325 source_disconnect(s);
326 sd_event_source_unref(s);
327 }
328
15b38f93 329 assert(e->n_sources == 0);
fd38203a 330
afc6adb5
LP
331 if (e->default_event_ptr)
332 *(e->default_event_ptr) = NULL;
333
03e334a1 334 safe_close(e->epoll_fd);
03e334a1 335 safe_close(e->watchdog_fd);
cde93897 336
6a0f1f6d 337 free_clock_data(&e->realtime);
a8548816 338 free_clock_data(&e->boottime);
6a0f1f6d
LP
339 free_clock_data(&e->monotonic);
340 free_clock_data(&e->realtime_alarm);
341 free_clock_data(&e->boottime_alarm);
342
fd38203a
LP
343 prioq_free(e->pending);
344 prioq_free(e->prepare);
6203e07a 345 prioq_free(e->exit);
fd38203a
LP
346
347 free(e->signal_sources);
9da4cb2b 348 hashmap_free(e->signal_data);
fd38203a 349
97ef5391
LP
350 hashmap_free(e->inotify_data);
351
fd38203a 352 hashmap_free(e->child_sources);
6e9feda3 353 set_free(e->post_sources);
8301aa0b 354
5cddd924
LP
355 free(e->event_queue);
356
8301aa0b 357 return mfree(e);
fd38203a
LP
358}
359
f7262a9f 360_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
361 sd_event *e;
362 int r;
363
305f78bf 364 assert_return(ret, -EINVAL);
fd38203a 365
d08eb1fa 366 e = new(sd_event, 1);
fd38203a
LP
367 if (!e)
368 return -ENOMEM;
369
d08eb1fa
LP
370 *e = (sd_event) {
371 .n_ref = 1,
372 .epoll_fd = -1,
373 .watchdog_fd = -1,
374 .realtime.wakeup = WAKEUP_CLOCK_DATA,
375 .realtime.fd = -1,
376 .realtime.next = USEC_INFINITY,
377 .boottime.wakeup = WAKEUP_CLOCK_DATA,
378 .boottime.fd = -1,
379 .boottime.next = USEC_INFINITY,
380 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
381 .monotonic.fd = -1,
382 .monotonic.next = USEC_INFINITY,
383 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
384 .realtime_alarm.fd = -1,
385 .realtime_alarm.next = USEC_INFINITY,
386 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
387 .boottime_alarm.fd = -1,
388 .boottime_alarm.next = USEC_INFINITY,
389 .perturb = USEC_INFINITY,
390 .original_pid = getpid_cached(),
391 };
fd38203a 392
c983e776
EV
393 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
394 if (r < 0)
fd38203a 395 goto fail;
fd38203a
LP
396
397 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
398 if (e->epoll_fd < 0) {
399 r = -errno;
400 goto fail;
401 }
402
7fe2903c
LP
403 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
404
34b87517 405 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
1d3a473b 406 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 … 2^63 us will be logged every 5s.");
34b87517
VC
407 e->profile_delays = true;
408 }
409
fd38203a
LP
410 *ret = e;
411 return 0;
412
413fail:
414 event_free(e);
415 return r;
416}
417
8301aa0b 418DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 419
afd15bbb
ZJS
420_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
421 if (s)
422 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
423 return sd_event_source_unref(s);
424}
425
eaa3cbef
LP
426static bool event_pid_changed(sd_event *e) {
427 assert(e);
428
a2360a46 429 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
430 * it around over a fork(). Let's complain. */
431
df0ff127 432 return e->original_pid != getpid_cached();
eaa3cbef
LP
433}
434
366e6411 435static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
436 assert(s);
437 assert(s->type == SOURCE_IO);
438
f6806734 439 if (event_pid_changed(s->event))
366e6411 440 return;
f6806734 441
fd38203a 442 if (!s->io.registered)
366e6411 443 return;
fd38203a 444
d1cf2023 445 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
f80a5d6a 446 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
55cbfaa5 447 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
448
449 s->io.registered = false;
fd38203a
LP
450}
451
305f78bf
LP
452static int source_io_register(
453 sd_event_source *s,
454 int enabled,
455 uint32_t events) {
456
fd38203a
LP
457 assert(s);
458 assert(s->type == SOURCE_IO);
baf76283 459 assert(enabled != SD_EVENT_OFF);
fd38203a 460
1eac7948 461 struct epoll_event ev = {
a82f89aa
LP
462 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
463 .data.ptr = s,
464 };
fd38203a 465
15c689d7 466 if (epoll_ctl(s->event->epoll_fd,
1eac7948 467 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
55c540d3 468 s->io.fd, &ev) < 0)
fd38203a
LP
469 return -errno;
470
471 s->io.registered = true;
472
473 return 0;
474}
475
f8f3f926
LP
476static void source_child_pidfd_unregister(sd_event_source *s) {
477 assert(s);
478 assert(s->type == SOURCE_CHILD);
479
480 if (event_pid_changed(s->event))
481 return;
482
483 if (!s->child.registered)
484 return;
485
486 if (EVENT_SOURCE_WATCH_PIDFD(s))
487 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
f80a5d6a 488 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
f8f3f926
LP
489 strna(s->description), event_source_type_to_string(s->type));
490
491 s->child.registered = false;
492}
493
494static int source_child_pidfd_register(sd_event_source *s, int enabled) {
f8f3f926
LP
495 assert(s);
496 assert(s->type == SOURCE_CHILD);
497 assert(enabled != SD_EVENT_OFF);
498
499 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 500 struct epoll_event ev = {
f8f3f926
LP
501 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
502 .data.ptr = s,
503 };
504
55c540d3
ZJS
505 if (epoll_ctl(s->event->epoll_fd,
506 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
507 s->child.pidfd, &ev) < 0)
f8f3f926
LP
508 return -errno;
509 }
510
511 s->child.registered = true;
512 return 0;
513}
514
6a0f1f6d
LP
515static clockid_t event_source_type_to_clock(EventSourceType t) {
516
517 switch (t) {
518
519 case SOURCE_TIME_REALTIME:
520 return CLOCK_REALTIME;
521
a8548816
TG
522 case SOURCE_TIME_BOOTTIME:
523 return CLOCK_BOOTTIME;
524
6a0f1f6d
LP
525 case SOURCE_TIME_MONOTONIC:
526 return CLOCK_MONOTONIC;
527
528 case SOURCE_TIME_REALTIME_ALARM:
529 return CLOCK_REALTIME_ALARM;
530
531 case SOURCE_TIME_BOOTTIME_ALARM:
532 return CLOCK_BOOTTIME_ALARM;
533
534 default:
535 return (clockid_t) -1;
536 }
537}
538
539static EventSourceType clock_to_event_source_type(clockid_t clock) {
540
541 switch (clock) {
542
543 case CLOCK_REALTIME:
544 return SOURCE_TIME_REALTIME;
545
a8548816
TG
546 case CLOCK_BOOTTIME:
547 return SOURCE_TIME_BOOTTIME;
548
6a0f1f6d
LP
549 case CLOCK_MONOTONIC:
550 return SOURCE_TIME_MONOTONIC;
551
552 case CLOCK_REALTIME_ALARM:
553 return SOURCE_TIME_REALTIME_ALARM;
554
555 case CLOCK_BOOTTIME_ALARM:
556 return SOURCE_TIME_BOOTTIME_ALARM;
557
558 default:
559 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
560 }
561}
562
563static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
564 assert(e);
565
566 switch (t) {
567
568 case SOURCE_TIME_REALTIME:
569 return &e->realtime;
570
a8548816
TG
571 case SOURCE_TIME_BOOTTIME:
572 return &e->boottime;
573
6a0f1f6d
LP
574 case SOURCE_TIME_MONOTONIC:
575 return &e->monotonic;
576
577 case SOURCE_TIME_REALTIME_ALARM:
578 return &e->realtime_alarm;
579
580 case SOURCE_TIME_BOOTTIME_ALARM:
581 return &e->boottime_alarm;
582
583 default:
584 return NULL;
585 }
586}
587
3e4eb8e7
YW
588static void event_free_signal_data(sd_event *e, struct signal_data *d) {
589 assert(e);
590
591 if (!d)
592 return;
593
594 hashmap_remove(e->signal_data, &d->priority);
595 safe_close(d->fd);
596 free(d);
597}
598
9da4cb2b
LP
599static int event_make_signal_data(
600 sd_event *e,
601 int sig,
602 struct signal_data **ret) {
4807d2d0 603
9da4cb2b
LP
604 struct signal_data *d;
605 bool added = false;
606 sigset_t ss_copy;
607 int64_t priority;
f95387cd
ZJS
608 int r;
609
610 assert(e);
611
f6806734 612 if (event_pid_changed(e))
9da4cb2b 613 return -ECHILD;
f6806734 614
9da4cb2b
LP
615 if (e->signal_sources && e->signal_sources[sig])
616 priority = e->signal_sources[sig]->priority;
617 else
de05913d 618 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 619
9da4cb2b
LP
620 d = hashmap_get(e->signal_data, &priority);
621 if (d) {
622 if (sigismember(&d->sigset, sig) > 0) {
623 if (ret)
624 *ret = d;
625 return 0;
626 }
627 } else {
d08eb1fa 628 d = new(struct signal_data, 1);
9da4cb2b
LP
629 if (!d)
630 return -ENOMEM;
631
d08eb1fa
LP
632 *d = (struct signal_data) {
633 .wakeup = WAKEUP_SIGNAL_DATA,
634 .fd = -1,
635 .priority = priority,
636 };
9da4cb2b 637
f656fdb6 638 r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d);
90f604d1
ZJS
639 if (r < 0) {
640 free(d);
9da4cb2b 641 return r;
90f604d1 642 }
f95387cd 643
9da4cb2b
LP
644 added = true;
645 }
646
647 ss_copy = d->sigset;
648 assert_se(sigaddset(&ss_copy, sig) >= 0);
649
650 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
651 if (r < 0) {
652 r = -errno;
653 goto fail;
654 }
655
656 d->sigset = ss_copy;
f95387cd 657
9da4cb2b
LP
658 if (d->fd >= 0) {
659 if (ret)
660 *ret = d;
f95387cd 661 return 0;
9da4cb2b
LP
662 }
663
7fe2903c 664 d->fd = fd_move_above_stdio(r);
f95387cd 665
1eac7948 666 struct epoll_event ev = {
a82f89aa
LP
667 .events = EPOLLIN,
668 .data.ptr = d,
669 };
f95387cd 670
15c689d7 671 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
672 r = -errno;
673 goto fail;
f95387cd
ZJS
674 }
675
9da4cb2b
LP
676 if (ret)
677 *ret = d;
678
f95387cd 679 return 0;
9da4cb2b
LP
680
681fail:
3e4eb8e7
YW
682 if (added)
683 event_free_signal_data(e, d);
9da4cb2b
LP
684
685 return r;
686}
687
688static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
689 assert(e);
690 assert(d);
691
692 /* Turns off the specified signal in the signal data
693 * object. If the signal mask of the object becomes empty that
694 * way removes it. */
695
696 if (sigismember(&d->sigset, sig) == 0)
697 return;
698
699 assert_se(sigdelset(&d->sigset, sig) >= 0);
700
701 if (sigisemptyset(&d->sigset)) {
9da4cb2b 702 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 703 event_free_signal_data(e, d);
9da4cb2b
LP
704 return;
705 }
706
707 assert(d->fd >= 0);
708
709 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
710 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
711}
712
713static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
714 struct signal_data *d;
715 static const int64_t zero_priority = 0;
716
717 assert(e);
718
f8f3f926
LP
719 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
720 * and possibly drop the signalfd for it. */
9da4cb2b
LP
721
722 if (sig == SIGCHLD &&
b6d5481b 723 e->n_online_child_sources > 0)
9da4cb2b
LP
724 return;
725
726 if (e->signal_sources &&
727 e->signal_sources[sig] &&
b6d5481b 728 event_source_is_online(e->signal_sources[sig]))
9da4cb2b
LP
729 return;
730
731 /*
732 * The specified signal might be enabled in three different queues:
733 *
734 * 1) the one that belongs to the priority passed (if it is non-NULL)
735 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
736 * 3) the 0 priority (to cover the SIGCHLD case)
737 *
738 * Hence, let's remove it from all three here.
739 */
740
741 if (priority) {
742 d = hashmap_get(e->signal_data, priority);
743 if (d)
744 event_unmask_signal_data(e, d, sig);
745 }
746
747 if (e->signal_sources && e->signal_sources[sig]) {
748 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
749 if (d)
750 event_unmask_signal_data(e, d, sig);
751 }
752
753 d = hashmap_get(e->signal_data, &zero_priority);
754 if (d)
755 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
756}
757
e1951c16
MS
758static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
759 assert(s);
760
761 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
762 * they are enabled/disabled or marked pending and such. */
763
764 if (s->pending)
765 prioq_reshuffle(s->event->pending, s, &s->pending_index);
766
767 if (s->prepare)
768 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
769}
770
771static void event_source_time_prioq_reshuffle(sd_event_source *s) {
772 struct clock_data *d;
773
774 assert(s);
e1951c16
MS
775
776 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
777 * pending, enable state. Makes sure the two prioq's are ordered properly again. */
b6d5481b
LP
778
779 if (s->ratelimited)
780 d = &s->event->monotonic;
781 else {
782 assert(EVENT_SOURCE_IS_TIME(s->type));
783 assert_se(d = event_get_clock_data(s->event, s->type));
784 }
785
f41315fc
LP
786 prioq_reshuffle(d->earliest, s, &s->earliest_index);
787 prioq_reshuffle(d->latest, s, &s->latest_index);
e1951c16
MS
788 d->needs_rearm = true;
789}
790
1e45e3fe
LP
791static void event_source_time_prioq_remove(
792 sd_event_source *s,
793 struct clock_data *d) {
794
795 assert(s);
796 assert(d);
797
f41315fc
LP
798 prioq_remove(d->earliest, s, &s->earliest_index);
799 prioq_remove(d->latest, s, &s->latest_index);
800 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
801 d->needs_rearm = true;
802}
803
a71fe8b8
LP
804static void source_disconnect(sd_event_source *s) {
805 sd_event *event;
806
fd38203a
LP
807 assert(s);
808
a71fe8b8
LP
809 if (!s->event)
810 return;
15b38f93 811
a71fe8b8 812 assert(s->event->n_sources > 0);
fd38203a 813
a71fe8b8 814 switch (s->type) {
fd38203a 815
a71fe8b8
LP
816 case SOURCE_IO:
817 if (s->io.fd >= 0)
818 source_io_unregister(s);
fd38203a 819
a71fe8b8 820 break;
6a0f1f6d 821
a71fe8b8 822 case SOURCE_TIME_REALTIME:
a8548816 823 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
824 case SOURCE_TIME_MONOTONIC:
825 case SOURCE_TIME_REALTIME_ALARM:
b6d5481b
LP
826 case SOURCE_TIME_BOOTTIME_ALARM:
827 /* Only remove this event source from the time event source here if it is not ratelimited. If
828 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
829 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
830
831 if (!s->ratelimited) {
832 struct clock_data *d;
833 assert_se(d = event_get_clock_data(s->event, s->type));
834 event_source_time_prioq_remove(s, d);
835 }
836
a71fe8b8 837 break;
a71fe8b8
LP
838
839 case SOURCE_SIGNAL:
840 if (s->signal.sig > 0) {
9da4cb2b 841
a71fe8b8
LP
842 if (s->event->signal_sources)
843 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 844
9da4cb2b 845 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 846 }
fd38203a 847
a71fe8b8 848 break;
fd38203a 849
a71fe8b8
LP
850 case SOURCE_CHILD:
851 if (s->child.pid > 0) {
b6d5481b
LP
852 if (event_source_is_online(s)) {
853 assert(s->event->n_online_child_sources > 0);
854 s->event->n_online_child_sources--;
4807d2d0 855 }
fd38203a 856
4a0b58c4 857 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 858 }
fd38203a 859
f8f3f926
LP
860 if (EVENT_SOURCE_WATCH_PIDFD(s))
861 source_child_pidfd_unregister(s);
862 else
863 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
864
a71fe8b8 865 break;
fd38203a 866
a71fe8b8
LP
867 case SOURCE_DEFER:
868 /* nothing */
869 break;
fd38203a 870
a71fe8b8
LP
871 case SOURCE_POST:
872 set_remove(s->event->post_sources, s);
873 break;
da7e457c 874
a71fe8b8
LP
875 case SOURCE_EXIT:
876 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
877 break;
0eb2e0e3 878
97ef5391
LP
879 case SOURCE_INOTIFY: {
880 struct inode_data *inode_data;
881
882 inode_data = s->inotify.inode_data;
883 if (inode_data) {
884 struct inotify_data *inotify_data;
885 assert_se(inotify_data = inode_data->inotify_data);
886
887 /* Detach this event source from the inode object */
888 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
889 s->inotify.inode_data = NULL;
890
891 if (s->pending) {
892 assert(inotify_data->n_pending > 0);
893 inotify_data->n_pending--;
894 }
895
896 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
897 * continued to being watched. That's because inotify doesn't really have an API for that: we
898 * can only change watch masks with access to the original inode either by fd or by path. But
899 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 900 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
901 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
902 * there), but given the need for open_by_handle_at() which is privileged and not universally
903 * available this would be quite an incomplete solution. Hence we go the other way, leave the
904 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
905 * anymore after reception. Yes, this sucks, but … Linux … */
906
907 /* Maybe release the inode data (and its inotify) */
908 event_gc_inode_data(s->event, inode_data);
909 }
910
911 break;
912 }
913
a71fe8b8
LP
914 default:
915 assert_not_reached("Wut? I shouldn't exist.");
916 }
6e9feda3 917
a71fe8b8
LP
918 if (s->pending)
919 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 920
a71fe8b8
LP
921 if (s->prepare)
922 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 923
b6d5481b
LP
924 if (s->ratelimited)
925 event_source_time_prioq_remove(s, &s->event->monotonic);
926
e514aa1e 927 event = TAKE_PTR(s->event);
a71fe8b8
LP
928 LIST_REMOVE(sources, event->sources, s);
929 event->n_sources--;
fd38203a 930
f5982559
LP
931 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
932 * pidfd associated with this event source, which we'll do only on source_free(). */
933
a71fe8b8
LP
934 if (!s->floating)
935 sd_event_unref(event);
936}
937
75db809a 938static sd_event_source* source_free(sd_event_source *s) {
a71fe8b8 939 assert(s);
fd38203a 940
a71fe8b8 941 source_disconnect(s);
ab93297c
NM
942
943 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
944 s->io.fd = safe_close(s->io.fd);
945
f8f3f926
LP
946 if (s->type == SOURCE_CHILD) {
947 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
948
949 if (s->child.process_owned) {
950
951 if (!s->child.exited) {
952 bool sent = false;
953
954 if (s->child.pidfd >= 0) {
955 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
956 if (errno == ESRCH) /* Already dead */
957 sent = true;
958 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
959 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
960 s->child.pid);
961 } else
962 sent = true;
963 }
964
965 if (!sent)
966 if (kill(s->child.pid, SIGKILL) < 0)
967 if (errno != ESRCH) /* Already dead */
968 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
969 s->child.pid);
970 }
971
972 if (!s->child.waited) {
973 siginfo_t si = {};
974
975 /* Reap the child if we can */
976 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
977 }
978 }
979
980 if (s->child.pidfd_owned)
981 s->child.pidfd = safe_close(s->child.pidfd);
982 }
983
15723a1d
LP
984 if (s->destroy_callback)
985 s->destroy_callback(s->userdata);
ab93297c 986
356779df 987 free(s->description);
75db809a 988 return mfree(s);
fd38203a 989}
8c75fe17 990DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
991
992static int source_set_pending(sd_event_source *s, bool b) {
993 int r;
994
995 assert(s);
6203e07a 996 assert(s->type != SOURCE_EXIT);
fd38203a
LP
997
998 if (s->pending == b)
999 return 0;
1000
1001 s->pending = b;
1002
1003 if (b) {
1004 s->pending_iteration = s->event->iteration;
1005
1006 r = prioq_put(s->event->pending, s, &s->pending_index);
1007 if (r < 0) {
1008 s->pending = false;
1009 return r;
1010 }
1011 } else
1012 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1013
e1951c16
MS
1014 if (EVENT_SOURCE_IS_TIME(s->type))
1015 event_source_time_prioq_reshuffle(s);
2576a19e 1016
9da4cb2b
LP
1017 if (s->type == SOURCE_SIGNAL && !b) {
1018 struct signal_data *d;
1019
1020 d = hashmap_get(s->event->signal_data, &s->priority);
1021 if (d && d->current == s)
1022 d->current = NULL;
1023 }
1024
97ef5391
LP
1025 if (s->type == SOURCE_INOTIFY) {
1026
1027 assert(s->inotify.inode_data);
1028 assert(s->inotify.inode_data->inotify_data);
1029
1030 if (b)
1031 s->inotify.inode_data->inotify_data->n_pending ++;
1032 else {
1033 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1034 s->inotify.inode_data->inotify_data->n_pending --;
1035 }
1036 }
1037
efd3be9d 1038 return 1;
fd38203a
LP
1039}
1040
a71fe8b8 1041static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
1042 sd_event_source *s;
1043
1044 assert(e);
1045
d08eb1fa 1046 s = new(sd_event_source, 1);
fd38203a
LP
1047 if (!s)
1048 return NULL;
1049
d08eb1fa
LP
1050 *s = (struct sd_event_source) {
1051 .n_ref = 1,
1052 .event = e,
1053 .floating = floating,
1054 .type = type,
1055 .pending_index = PRIOQ_IDX_NULL,
1056 .prepare_index = PRIOQ_IDX_NULL,
1057 };
a71fe8b8
LP
1058
1059 if (!floating)
1060 sd_event_ref(e);
fd38203a 1061
a71fe8b8 1062 LIST_PREPEND(sources, e->sources, s);
313cefa1 1063 e->n_sources++;
15b38f93 1064
fd38203a
LP
1065 return s;
1066}
1067
b9350e70
LP
1068static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1069 assert(s);
1070
1071 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1072}
1073
f7262a9f 1074_public_ int sd_event_add_io(
fd38203a 1075 sd_event *e,
151b9b96 1076 sd_event_source **ret,
fd38203a
LP
1077 int fd,
1078 uint32_t events,
718db961 1079 sd_event_io_handler_t callback,
151b9b96 1080 void *userdata) {
fd38203a 1081
ec766a51 1082 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1083 int r;
1084
305f78bf 1085 assert_return(e, -EINVAL);
b937d761 1086 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 1087 assert_return(fd >= 0, -EBADF);
2a16a986 1088 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1089 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1090 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1091
b9350e70
LP
1092 if (!callback)
1093 callback = io_exit_callback;
1094
a71fe8b8 1095 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1096 if (!s)
1097 return -ENOMEM;
1098
9da4cb2b 1099 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1100 s->io.fd = fd;
1101 s->io.events = events;
1102 s->io.callback = callback;
1103 s->userdata = userdata;
baf76283 1104 s->enabled = SD_EVENT_ON;
fd38203a 1105
baf76283 1106 r = source_io_register(s, s->enabled, events);
ec766a51 1107 if (r < 0)
050f74f2 1108 return r;
fd38203a 1109
a71fe8b8
LP
1110 if (ret)
1111 *ret = s;
ec766a51 1112 TAKE_PTR(s);
a71fe8b8 1113
fd38203a
LP
1114 return 0;
1115}
1116
52444dc4
LP
1117static void initialize_perturb(sd_event *e) {
1118 sd_id128_t bootid = {};
1119
1120 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1121 the same time within each minute/second/250ms, so that
52444dc4
LP
1122 events all across the system can be coalesced into a single
1123 CPU wakeup. However, let's take some system-specific
1124 randomness for this value, so that in a network of systems
1125 with synced clocks timer events are distributed a
1126 bit. Here, we calculate a perturbation usec offset from the
1127 boot ID. */
1128
3a43da28 1129 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1130 return;
1131
1132 if (sd_id128_get_boot(&bootid) >= 0)
1133 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1134}
1135
fd38203a
LP
1136static int event_setup_timer_fd(
1137 sd_event *e,
6a0f1f6d
LP
1138 struct clock_data *d,
1139 clockid_t clock) {
fd38203a 1140
fd38203a 1141 assert(e);
6a0f1f6d 1142 assert(d);
fd38203a 1143
6a0f1f6d 1144 if (_likely_(d->fd >= 0))
fd38203a
LP
1145 return 0;
1146
b44d87e2 1147 _cleanup_close_ int fd = -1;
b44d87e2 1148
6a0f1f6d 1149 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1150 if (fd < 0)
1151 return -errno;
1152
7fe2903c
LP
1153 fd = fd_move_above_stdio(fd);
1154
1eac7948 1155 struct epoll_event ev = {
a82f89aa
LP
1156 .events = EPOLLIN,
1157 .data.ptr = d,
1158 };
fd38203a 1159
15c689d7 1160 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1161 return -errno;
fd38203a 1162
b44d87e2 1163 d->fd = TAKE_FD(fd);
fd38203a
LP
1164 return 0;
1165}
1166
c4f1aff2
TG
1167static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1168 assert(s);
1169
1170 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1171}
1172
41c63f36
LP
1173static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1174 int r;
1175
1176 assert(d);
1177
1178 if (d->fd < 0) {
1179 r = event_setup_timer_fd(e, d, clock);
1180 if (r < 0)
1181 return r;
1182 }
1183
1184 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1185 if (r < 0)
1186 return r;
1187
1188 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1189 if (r < 0)
1190 return r;
1191
1192 return 0;
1193}
1194
1e45e3fe
LP
1195static int event_source_time_prioq_put(
1196 sd_event_source *s,
1197 struct clock_data *d) {
1198
1199 int r;
1200
1201 assert(s);
1202 assert(d);
1203
f41315fc 1204 r = prioq_put(d->earliest, s, &s->earliest_index);
1e45e3fe
LP
1205 if (r < 0)
1206 return r;
1207
f41315fc 1208 r = prioq_put(d->latest, s, &s->latest_index);
1e45e3fe 1209 if (r < 0) {
f41315fc
LP
1210 assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
1211 s->earliest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
1212 return r;
1213 }
1214
1215 d->needs_rearm = true;
1216 return 0;
1217}
1218
6a0f1f6d 1219_public_ int sd_event_add_time(
fd38203a 1220 sd_event *e,
151b9b96 1221 sd_event_source **ret,
6a0f1f6d 1222 clockid_t clock,
fd38203a 1223 uint64_t usec,
c2ba3ad6 1224 uint64_t accuracy,
718db961 1225 sd_event_time_handler_t callback,
151b9b96 1226 void *userdata) {
fd38203a 1227
6a0f1f6d 1228 EventSourceType type;
ec766a51 1229 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1230 struct clock_data *d;
fd38203a
LP
1231 int r;
1232
305f78bf 1233 assert_return(e, -EINVAL);
b937d761 1234 assert_return(e = event_resolve(e), -ENOPKG);
f5fbe71d 1235 assert_return(accuracy != UINT64_MAX, -EINVAL);
da7e457c 1236 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1237 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1238
e475d10c
LP
1239 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1240 return -EOPNOTSUPP;
1241
1242 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1243 if (type < 0)
3411372e
LP
1244 return -EOPNOTSUPP;
1245
c4f1aff2
TG
1246 if (!callback)
1247 callback = time_exit_callback;
1248
1e45e3fe 1249 assert_se(d = event_get_clock_data(e, type));
c2ba3ad6 1250
41c63f36 1251 r = setup_clock_data(e, d, clock);
c983e776
EV
1252 if (r < 0)
1253 return r;
fd38203a 1254
a71fe8b8 1255 s = source_new(e, !ret, type);
fd38203a
LP
1256 if (!s)
1257 return -ENOMEM;
1258
1259 s->time.next = usec;
c2ba3ad6 1260 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1261 s->time.callback = callback;
f41315fc 1262 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
fd38203a 1263 s->userdata = userdata;
baf76283 1264 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1265
1e45e3fe 1266 r = event_source_time_prioq_put(s, d);
c2ba3ad6 1267 if (r < 0)
ec766a51 1268 return r;
fd38203a 1269
a71fe8b8
LP
1270 if (ret)
1271 *ret = s;
ec766a51 1272 TAKE_PTR(s);
a71fe8b8 1273
fd38203a
LP
1274 return 0;
1275}
1276
d6a83dc4
LP
1277_public_ int sd_event_add_time_relative(
1278 sd_event *e,
1279 sd_event_source **ret,
1280 clockid_t clock,
1281 uint64_t usec,
1282 uint64_t accuracy,
1283 sd_event_time_handler_t callback,
1284 void *userdata) {
1285
1286 usec_t t;
1287 int r;
1288
1289 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1290 * checks for overflow. */
1291
1292 r = sd_event_now(e, clock, &t);
1293 if (r < 0)
1294 return r;
1295
1296 if (usec >= USEC_INFINITY - t)
1297 return -EOVERFLOW;
1298
1299 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1300}
1301
59bc1fd7
LP
1302static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1303 assert(s);
1304
1305 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1306}
1307
f7262a9f 1308_public_ int sd_event_add_signal(
305f78bf 1309 sd_event *e,
151b9b96 1310 sd_event_source **ret,
305f78bf 1311 int sig,
718db961 1312 sd_event_signal_handler_t callback,
151b9b96 1313 void *userdata) {
305f78bf 1314
ec766a51 1315 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1316 struct signal_data *d;
fd38203a
LP
1317 int r;
1318
305f78bf 1319 assert_return(e, -EINVAL);
b937d761 1320 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1321 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1322 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1323 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1324
59bc1fd7
LP
1325 if (!callback)
1326 callback = signal_exit_callback;
1327
d1b75241
LP
1328 r = signal_is_blocked(sig);
1329 if (r < 0)
1330 return r;
1331 if (r == 0)
3022d74b
LP
1332 return -EBUSY;
1333
fd38203a
LP
1334 if (!e->signal_sources) {
1335 e->signal_sources = new0(sd_event_source*, _NSIG);
1336 if (!e->signal_sources)
1337 return -ENOMEM;
1338 } else if (e->signal_sources[sig])
1339 return -EBUSY;
1340
a71fe8b8 1341 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1342 if (!s)
1343 return -ENOMEM;
1344
1345 s->signal.sig = sig;
1346 s->signal.callback = callback;
1347 s->userdata = userdata;
baf76283 1348 s->enabled = SD_EVENT_ON;
fd38203a
LP
1349
1350 e->signal_sources[sig] = s;
fd38203a 1351
9da4cb2b 1352 r = event_make_signal_data(e, sig, &d);
ec766a51 1353 if (r < 0)
9da4cb2b 1354 return r;
fd38203a 1355
f1f00dbb
LP
1356 /* Use the signal name as description for the event source by default */
1357 (void) sd_event_source_set_description(s, signal_to_string(sig));
1358
a71fe8b8
LP
1359 if (ret)
1360 *ret = s;
ec766a51 1361 TAKE_PTR(s);
a71fe8b8 1362
fd38203a
LP
1363 return 0;
1364}
1365
b9350e70
LP
1366static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1367 assert(s);
1368
1369 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1370}
1371
f8f3f926
LP
1372static bool shall_use_pidfd(void) {
1373 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1374 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1375}
1376
f7262a9f 1377_public_ int sd_event_add_child(
305f78bf 1378 sd_event *e,
151b9b96 1379 sd_event_source **ret,
305f78bf
LP
1380 pid_t pid,
1381 int options,
718db961 1382 sd_event_child_handler_t callback,
151b9b96 1383 void *userdata) {
305f78bf 1384
ec766a51 1385 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1386 int r;
1387
305f78bf 1388 assert_return(e, -EINVAL);
b937d761 1389 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1390 assert_return(pid > 1, -EINVAL);
1391 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1392 assert_return(options != 0, -EINVAL);
da7e457c 1393 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1394 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1395
b9350e70
LP
1396 if (!callback)
1397 callback = child_exit_callback;
1398
b6d5481b 1399 if (e->n_online_child_sources == 0) {
ee880b37
LP
1400 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1401 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1402 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1403 * take effect.
1404 *
1405 * (As an optimization we only do this check on the first child event source created.) */
1406 r = signal_is_blocked(SIGCHLD);
1407 if (r < 0)
1408 return r;
1409 if (r == 0)
1410 return -EBUSY;
1411 }
1412
d5099efc 1413 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1414 if (r < 0)
1415 return r;
1416
4a0b58c4 1417 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1418 return -EBUSY;
1419
a71fe8b8 1420 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1421 if (!s)
1422 return -ENOMEM;
1423
f8f3f926 1424 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1425 s->child.pid = pid;
1426 s->child.options = options;
1427 s->child.callback = callback;
1428 s->userdata = userdata;
baf76283 1429 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1430
f8f3f926
LP
1431 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1432 * pin the PID, and make regular waitid() handling race-free. */
1433
1434 if (shall_use_pidfd()) {
1435 s->child.pidfd = pidfd_open(s->child.pid, 0);
1436 if (s->child.pidfd < 0) {
1437 /* Propagate errors unless the syscall is not supported or blocked */
1438 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1439 return -errno;
1440 } else
1441 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1442 } else
1443 s->child.pidfd = -1;
1444
4a0b58c4 1445 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1446 if (r < 0)
fd38203a 1447 return r;
fd38203a 1448
f8f3f926
LP
1449 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1450 /* We have a pidfd and we only want to watch for exit */
f8f3f926 1451 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1452 if (r < 0)
f8f3f926 1453 return r;
ac9f2640 1454
f8f3f926
LP
1455 } else {
1456 /* We have no pidfd or we shall wait for some other event than WEXITED */
f8f3f926 1457 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1458 if (r < 0)
f8f3f926 1459 return r;
f8f3f926
LP
1460
1461 e->need_process_child = true;
1462 }
c2ba3ad6 1463
b6d5481b 1464 e->n_online_child_sources++;
ac9f2640 1465
a71fe8b8
LP
1466 if (ret)
1467 *ret = s;
ec766a51 1468 TAKE_PTR(s);
f8f3f926
LP
1469 return 0;
1470}
1471
1472_public_ int sd_event_add_child_pidfd(
1473 sd_event *e,
1474 sd_event_source **ret,
1475 int pidfd,
1476 int options,
1477 sd_event_child_handler_t callback,
1478 void *userdata) {
1479
1480
1481 _cleanup_(source_freep) sd_event_source *s = NULL;
1482 pid_t pid;
1483 int r;
1484
1485 assert_return(e, -EINVAL);
1486 assert_return(e = event_resolve(e), -ENOPKG);
1487 assert_return(pidfd >= 0, -EBADF);
1488 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1489 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1490 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1491 assert_return(!event_pid_changed(e), -ECHILD);
1492
b9350e70
LP
1493 if (!callback)
1494 callback = child_exit_callback;
1495
b6d5481b 1496 if (e->n_online_child_sources == 0) {
ee880b37
LP
1497 r = signal_is_blocked(SIGCHLD);
1498 if (r < 0)
1499 return r;
1500 if (r == 0)
1501 return -EBUSY;
1502 }
1503
f8f3f926
LP
1504 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1505 if (r < 0)
1506 return r;
1507
1508 r = pidfd_get_pid(pidfd, &pid);
1509 if (r < 0)
1510 return r;
1511
1512 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1513 return -EBUSY;
1514
1515 s = source_new(e, !ret, SOURCE_CHILD);
1516 if (!s)
1517 return -ENOMEM;
1518
1519 s->wakeup = WAKEUP_EVENT_SOURCE;
1520 s->child.pidfd = pidfd;
1521 s->child.pid = pid;
1522 s->child.options = options;
1523 s->child.callback = callback;
1524 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1525 s->userdata = userdata;
1526 s->enabled = SD_EVENT_ONESHOT;
1527
1528 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1529 if (r < 0)
1530 return r;
1531
f8f3f926
LP
1532 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1533 /* We only want to watch for WEXITED */
f8f3f926 1534 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1535 if (r < 0)
f8f3f926 1536 return r;
f8f3f926
LP
1537 } else {
1538 /* We shall wait for some other event than WEXITED */
f8f3f926 1539 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1540 if (r < 0)
f8f3f926 1541 return r;
a71fe8b8 1542
f8f3f926
LP
1543 e->need_process_child = true;
1544 }
1545
b6d5481b 1546 e->n_online_child_sources++;
ac9f2640 1547
f8f3f926
LP
1548 if (ret)
1549 *ret = s;
f8f3f926 1550 TAKE_PTR(s);
fd38203a
LP
1551 return 0;
1552}
1553
b9350e70
LP
1554static int generic_exit_callback(sd_event_source *s, void *userdata) {
1555 assert(s);
1556
1557 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1558}
1559
f7262a9f 1560_public_ int sd_event_add_defer(
305f78bf 1561 sd_event *e,
151b9b96 1562 sd_event_source **ret,
718db961 1563 sd_event_handler_t callback,
151b9b96 1564 void *userdata) {
305f78bf 1565
ec766a51 1566 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1567 int r;
1568
305f78bf 1569 assert_return(e, -EINVAL);
b937d761 1570 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1571 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1572 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1573
b9350e70
LP
1574 if (!callback)
1575 callback = generic_exit_callback;
1576
a71fe8b8 1577 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1578 if (!s)
1579 return -ENOMEM;
1580
1581 s->defer.callback = callback;
1582 s->userdata = userdata;
baf76283 1583 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1584
1585 r = source_set_pending(s, true);
ec766a51 1586 if (r < 0)
fd38203a 1587 return r;
fd38203a 1588
a71fe8b8
LP
1589 if (ret)
1590 *ret = s;
ec766a51 1591 TAKE_PTR(s);
a71fe8b8 1592
fd38203a
LP
1593 return 0;
1594}
1595
6e9feda3
LP
1596_public_ int sd_event_add_post(
1597 sd_event *e,
1598 sd_event_source **ret,
1599 sd_event_handler_t callback,
1600 void *userdata) {
1601
ec766a51 1602 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1603 int r;
1604
1605 assert_return(e, -EINVAL);
b937d761 1606 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1607 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1608 assert_return(!event_pid_changed(e), -ECHILD);
1609
b9350e70
LP
1610 if (!callback)
1611 callback = generic_exit_callback;
1612
a71fe8b8 1613 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1614 if (!s)
1615 return -ENOMEM;
1616
1617 s->post.callback = callback;
1618 s->userdata = userdata;
1619 s->enabled = SD_EVENT_ON;
1620
de7fef4b 1621 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1622 if (r < 0)
6e9feda3 1623 return r;
de7fef4b 1624 assert(r > 0);
6e9feda3 1625
a71fe8b8
LP
1626 if (ret)
1627 *ret = s;
ec766a51 1628 TAKE_PTR(s);
a71fe8b8 1629
6e9feda3
LP
1630 return 0;
1631}
1632
6203e07a 1633_public_ int sd_event_add_exit(
305f78bf 1634 sd_event *e,
151b9b96 1635 sd_event_source **ret,
718db961 1636 sd_event_handler_t callback,
151b9b96 1637 void *userdata) {
305f78bf 1638
ec766a51 1639 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1640 int r;
1641
1642 assert_return(e, -EINVAL);
b937d761 1643 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1644 assert_return(callback, -EINVAL);
1645 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1646 assert_return(!event_pid_changed(e), -ECHILD);
1647
c983e776
EV
1648 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1649 if (r < 0)
1650 return r;
da7e457c 1651
a71fe8b8 1652 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1653 if (!s)
da7e457c 1654 return -ENOMEM;
fd38203a 1655
6203e07a 1656 s->exit.callback = callback;
da7e457c 1657 s->userdata = userdata;
6203e07a 1658 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1659 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1660
6203e07a 1661 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1662 if (r < 0)
da7e457c 1663 return r;
da7e457c 1664
a71fe8b8
LP
1665 if (ret)
1666 *ret = s;
ec766a51 1667 TAKE_PTR(s);
a71fe8b8 1668
da7e457c
LP
1669 return 0;
1670}
1671
97ef5391
LP
1672static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1673 assert(e);
1674
1675 if (!d)
1676 return;
1677
1678 assert(hashmap_isempty(d->inodes));
1679 assert(hashmap_isempty(d->wd));
1680
1681 if (d->buffer_filled > 0)
1682 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1683
1684 hashmap_free(d->inodes);
1685 hashmap_free(d->wd);
1686
1687 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1688
1689 if (d->fd >= 0) {
1690 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1691 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1692
1693 safe_close(d->fd);
1694 }
1695 free(d);
1696}
1697
1698static int event_make_inotify_data(
1699 sd_event *e,
1700 int64_t priority,
1701 struct inotify_data **ret) {
1702
1703 _cleanup_close_ int fd = -1;
1704 struct inotify_data *d;
97ef5391
LP
1705 int r;
1706
1707 assert(e);
1708
1709 d = hashmap_get(e->inotify_data, &priority);
1710 if (d) {
1711 if (ret)
1712 *ret = d;
1713 return 0;
1714 }
1715
1716 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1717 if (fd < 0)
1718 return -errno;
1719
1720 fd = fd_move_above_stdio(fd);
1721
97ef5391
LP
1722 d = new(struct inotify_data, 1);
1723 if (!d)
1724 return -ENOMEM;
1725
1726 *d = (struct inotify_data) {
1727 .wakeup = WAKEUP_INOTIFY_DATA,
1728 .fd = TAKE_FD(fd),
1729 .priority = priority,
1730 };
1731
c2484a75 1732 r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d);
97ef5391
LP
1733 if (r < 0) {
1734 d->fd = safe_close(d->fd);
1735 free(d);
1736 return r;
1737 }
1738
1eac7948 1739 struct epoll_event ev = {
97ef5391
LP
1740 .events = EPOLLIN,
1741 .data.ptr = d,
1742 };
1743
1744 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1745 r = -errno;
1746 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1747 * remove the fd from the epoll first, which we don't want as we couldn't
1748 * add it in the first place. */
1749 event_free_inotify_data(e, d);
1750 return r;
1751 }
1752
1753 if (ret)
1754 *ret = d;
1755
1756 return 1;
1757}
1758
7a08d314 1759static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1760 int r;
97ef5391
LP
1761
1762 assert(x);
1763 assert(y);
1764
90c88092
YW
1765 r = CMP(x->dev, y->dev);
1766 if (r != 0)
1767 return r;
97ef5391 1768
6dd91b36 1769 return CMP(x->ino, y->ino);
97ef5391
LP
1770}
1771
7a08d314
YW
1772static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1773 assert(d);
97ef5391
LP
1774
1775 siphash24_compress(&d->dev, sizeof(d->dev), state);
1776 siphash24_compress(&d->ino, sizeof(d->ino), state);
1777}
1778
7a08d314 1779DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1780
1781static void event_free_inode_data(
1782 sd_event *e,
1783 struct inode_data *d) {
1784
1785 assert(e);
1786
1787 if (!d)
1788 return;
1789
1790 assert(!d->event_sources);
1791
1792 if (d->fd >= 0) {
1793 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1794 safe_close(d->fd);
1795 }
1796
1797 if (d->inotify_data) {
1798
1799 if (d->wd >= 0) {
1800 if (d->inotify_data->fd >= 0) {
1801 /* So here's a problem. At the time this runs the watch descriptor might already be
1802 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1803 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1804 * likely case to happen. */
1805
1806 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1807 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1808 }
1809
1810 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1811 }
1812
1813 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1814 }
1815
1816 free(d);
1817}
1818
1819static void event_gc_inode_data(
1820 sd_event *e,
1821 struct inode_data *d) {
1822
1823 struct inotify_data *inotify_data;
1824
1825 assert(e);
1826
1827 if (!d)
1828 return;
1829
1830 if (d->event_sources)
1831 return;
1832
1833 inotify_data = d->inotify_data;
1834 event_free_inode_data(e, d);
1835
1836 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1837 event_free_inotify_data(e, inotify_data);
1838}
1839
1840static int event_make_inode_data(
1841 sd_event *e,
1842 struct inotify_data *inotify_data,
1843 dev_t dev,
1844 ino_t ino,
1845 struct inode_data **ret) {
1846
1847 struct inode_data *d, key;
1848 int r;
1849
1850 assert(e);
1851 assert(inotify_data);
1852
1853 key = (struct inode_data) {
1854 .ino = ino,
1855 .dev = dev,
1856 };
1857
1858 d = hashmap_get(inotify_data->inodes, &key);
1859 if (d) {
1860 if (ret)
1861 *ret = d;
1862
1863 return 0;
1864 }
1865
1866 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1867 if (r < 0)
1868 return r;
1869
1870 d = new(struct inode_data, 1);
1871 if (!d)
1872 return -ENOMEM;
1873
1874 *d = (struct inode_data) {
1875 .dev = dev,
1876 .ino = ino,
1877 .wd = -1,
1878 .fd = -1,
1879 .inotify_data = inotify_data,
1880 };
1881
1882 r = hashmap_put(inotify_data->inodes, d, d);
1883 if (r < 0) {
1884 free(d);
1885 return r;
1886 }
1887
1888 if (ret)
1889 *ret = d;
1890
1891 return 1;
1892}
1893
1894static uint32_t inode_data_determine_mask(struct inode_data *d) {
1895 bool excl_unlink = true;
1896 uint32_t combined = 0;
1897 sd_event_source *s;
1898
1899 assert(d);
1900
1901 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1902 * the IN_EXCL_UNLINK flag is ANDed instead.
1903 *
1904 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1905 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1906 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1907 * events we don't care for client-side. */
1908
1909 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1910
1911 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1912 excl_unlink = false;
1913
1914 combined |= s->inotify.mask;
1915 }
1916
1917 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1918}
1919
1920static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1921 uint32_t combined_mask;
1922 int wd, r;
1923
1924 assert(d);
1925 assert(d->fd >= 0);
1926
1927 combined_mask = inode_data_determine_mask(d);
1928
1929 if (d->wd >= 0 && combined_mask == d->combined_mask)
1930 return 0;
1931
1932 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1933 if (r < 0)
1934 return r;
1935
1936 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1937 if (wd < 0)
1938 return -errno;
1939
1940 if (d->wd < 0) {
1941 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1942 if (r < 0) {
1943 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1944 return r;
1945 }
1946
1947 d->wd = wd;
1948
1949 } else if (d->wd != wd) {
1950
1951 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1952 (void) inotify_rm_watch(d->fd, wd);
1953 return -EINVAL;
1954 }
1955
1956 d->combined_mask = combined_mask;
1957 return 1;
1958}
1959
b9350e70
LP
1960static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1961 assert(s);
1962
1963 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1964}
1965
97ef5391
LP
1966_public_ int sd_event_add_inotify(
1967 sd_event *e,
1968 sd_event_source **ret,
1969 const char *path,
1970 uint32_t mask,
1971 sd_event_inotify_handler_t callback,
1972 void *userdata) {
1973
97ef5391
LP
1974 struct inotify_data *inotify_data = NULL;
1975 struct inode_data *inode_data = NULL;
1976 _cleanup_close_ int fd = -1;
8c75fe17 1977 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
1978 struct stat st;
1979 int r;
1980
1981 assert_return(e, -EINVAL);
1982 assert_return(e = event_resolve(e), -ENOPKG);
1983 assert_return(path, -EINVAL);
97ef5391
LP
1984 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1985 assert_return(!event_pid_changed(e), -ECHILD);
1986
b9350e70
LP
1987 if (!callback)
1988 callback = inotify_exit_callback;
1989
97ef5391
LP
1990 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1991 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1992 * the user can't use them for us. */
1993 if (mask & IN_MASK_ADD)
1994 return -EINVAL;
1995
1996 fd = open(path, O_PATH|O_CLOEXEC|
1997 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1998 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1999 if (fd < 0)
2000 return -errno;
2001
2002 if (fstat(fd, &st) < 0)
2003 return -errno;
2004
2005 s = source_new(e, !ret, SOURCE_INOTIFY);
2006 if (!s)
2007 return -ENOMEM;
2008
2009 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
2010 s->inotify.mask = mask;
2011 s->inotify.callback = callback;
2012 s->userdata = userdata;
2013
2014 /* Allocate an inotify object for this priority, and an inode object within it */
2015 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
2016 if (r < 0)
8c75fe17 2017 return r;
97ef5391
LP
2018
2019 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17
ZJS
2020 if (r < 0) {
2021 event_free_inotify_data(e, inotify_data);
2022 return r;
2023 }
97ef5391
LP
2024
2025 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2026 * the event source, until then, for which we need the original inode. */
2027 if (inode_data->fd < 0) {
2028 inode_data->fd = TAKE_FD(fd);
2029 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
2030 }
2031
2032 /* Link our event source to the inode data object */
2033 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
2034 s->inotify.inode_data = inode_data;
2035
97ef5391
LP
2036 /* Actually realize the watch now */
2037 r = inode_data_realize_watch(e, inode_data);
2038 if (r < 0)
8c75fe17 2039 return r;
97ef5391
LP
2040
2041 (void) sd_event_source_set_description(s, path);
2042
2043 if (ret)
2044 *ret = s;
8c75fe17 2045 TAKE_PTR(s);
97ef5391
LP
2046
2047 return 0;
97ef5391
LP
2048}
2049
8301aa0b 2050static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
2051 if (!s)
2052 return NULL;
da7e457c 2053
8301aa0b
YW
2054 /* Here's a special hack: when we are called from a
2055 * dispatch handler we won't free the event source
2056 * immediately, but we will detach the fd from the
2057 * epoll. This way it is safe for the caller to unref
2058 * the event source and immediately close the fd, but
2059 * we still retain a valid event source object after
2060 * the callback. */
fd38203a 2061
8301aa0b
YW
2062 if (s->dispatching) {
2063 if (s->type == SOURCE_IO)
2064 source_io_unregister(s);
fd38203a 2065
8301aa0b
YW
2066 source_disconnect(s);
2067 } else
2068 source_free(s);
fd38203a
LP
2069
2070 return NULL;
2071}
2072
8301aa0b
YW
2073DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
2074
356779df 2075_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 2076 assert_return(s, -EINVAL);
f4b2933e 2077 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2078
356779df 2079 return free_and_strdup(&s->description, description);
f7f53e9e
TG
2080}
2081
356779df 2082_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 2083 assert_return(s, -EINVAL);
356779df 2084 assert_return(description, -EINVAL);
f4b2933e 2085 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2086
7d92a1a4
ZJS
2087 if (!s->description)
2088 return -ENXIO;
2089
356779df 2090 *description = s->description;
f7f53e9e
TG
2091 return 0;
2092}
2093
adcc4ca3 2094_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 2095 assert_return(s, NULL);
eaa3cbef
LP
2096
2097 return s->event;
2098}
2099
f7262a9f 2100_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 2101 assert_return(s, -EINVAL);
6203e07a 2102 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 2103 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2104 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2105
2106 return s->pending;
2107}
2108
f7262a9f 2109_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2110 assert_return(s, -EINVAL);
2111 assert_return(s->type == SOURCE_IO, -EDOM);
2112 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2113
2114 return s->io.fd;
2115}
2116
30caf8f3
LP
2117_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2118 int r;
2119
2120 assert_return(s, -EINVAL);
8ac43fee 2121 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2122 assert_return(s->type == SOURCE_IO, -EDOM);
2123 assert_return(!event_pid_changed(s->event), -ECHILD);
2124
2125 if (s->io.fd == fd)
2126 return 0;
2127
b6d5481b 2128 if (event_source_is_offline(s)) {
30caf8f3
LP
2129 s->io.fd = fd;
2130 s->io.registered = false;
2131 } else {
2132 int saved_fd;
2133
2134 saved_fd = s->io.fd;
2135 assert(s->io.registered);
2136
2137 s->io.fd = fd;
2138 s->io.registered = false;
2139
2140 r = source_io_register(s, s->enabled, s->io.events);
2141 if (r < 0) {
2142 s->io.fd = saved_fd;
2143 s->io.registered = true;
2144 return r;
2145 }
2146
5a795bff 2147 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2148 }
2149
2150 return 0;
2151}
2152
ab93297c
NM
2153_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2154 assert_return(s, -EINVAL);
2155 assert_return(s->type == SOURCE_IO, -EDOM);
2156
2157 return s->io.owned;
2158}
2159
2160_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2161 assert_return(s, -EINVAL);
2162 assert_return(s->type == SOURCE_IO, -EDOM);
2163
2164 s->io.owned = own;
2165 return 0;
2166}
2167
f7262a9f 2168_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2169 assert_return(s, -EINVAL);
2170 assert_return(events, -EINVAL);
2171 assert_return(s->type == SOURCE_IO, -EDOM);
2172 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2173
2174 *events = s->io.events;
2175 return 0;
2176}
2177
f7262a9f 2178_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2179 int r;
2180
305f78bf
LP
2181 assert_return(s, -EINVAL);
2182 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2183 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2184 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2185 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2186
b63c8d4f
DH
2187 /* edge-triggered updates are never skipped, so we can reset edges */
2188 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2189 return 0;
2190
2a0dc6cd
LP
2191 r = source_set_pending(s, false);
2192 if (r < 0)
2193 return r;
2194
b6d5481b 2195 if (event_source_is_online(s)) {
e4715127 2196 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2197 if (r < 0)
2198 return r;
2199 }
2200
2201 s->io.events = events;
2202
2203 return 0;
2204}
2205
f7262a9f 2206_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2207 assert_return(s, -EINVAL);
2208 assert_return(revents, -EINVAL);
2209 assert_return(s->type == SOURCE_IO, -EDOM);
2210 assert_return(s->pending, -ENODATA);
2211 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2212
2213 *revents = s->io.revents;
2214 return 0;
2215}
2216
f7262a9f 2217_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2218 assert_return(s, -EINVAL);
2219 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2220 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2221
2222 return s->signal.sig;
2223}
2224
31927c16 2225_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2226 assert_return(s, -EINVAL);
2227 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2228
6680b8d1
ME
2229 *priority = s->priority;
2230 return 0;
fd38203a
LP
2231}
2232
31927c16 2233_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2234 bool rm_inotify = false, rm_inode = false;
2235 struct inotify_data *new_inotify_data = NULL;
2236 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2237 int r;
2238
305f78bf 2239 assert_return(s, -EINVAL);
da7e457c 2240 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2241 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2242
2243 if (s->priority == priority)
2244 return 0;
2245
97ef5391
LP
2246 if (s->type == SOURCE_INOTIFY) {
2247 struct inode_data *old_inode_data;
2248
2249 assert(s->inotify.inode_data);
2250 old_inode_data = s->inotify.inode_data;
2251
2252 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2253 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2254 * events we allow priority changes only until the first following iteration. */
2255 if (old_inode_data->fd < 0)
2256 return -EOPNOTSUPP;
2257
2258 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2259 if (r < 0)
2260 return r;
2261 rm_inotify = r > 0;
2262
2263 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2264 if (r < 0)
2265 goto fail;
2266 rm_inode = r > 0;
2267
2268 if (new_inode_data->fd < 0) {
2269 /* Duplicate the fd for the new inode object if we don't have any yet */
2270 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2271 if (new_inode_data->fd < 0) {
2272 r = -errno;
2273 goto fail;
2274 }
2275
2276 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2277 }
2278
2279 /* Move the event source to the new inode data structure */
2280 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2281 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2282 s->inotify.inode_data = new_inode_data;
2283
2284 /* Now create the new watch */
2285 r = inode_data_realize_watch(s->event, new_inode_data);
2286 if (r < 0) {
2287 /* Move it back */
2288 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2289 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2290 s->inotify.inode_data = old_inode_data;
2291 goto fail;
2292 }
2293
2294 s->priority = priority;
2295
2296 event_gc_inode_data(s->event, old_inode_data);
2297
b6d5481b 2298 } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) {
9da4cb2b
LP
2299 struct signal_data *old, *d;
2300
2301 /* Move us from the signalfd belonging to the old
2302 * priority to the signalfd of the new priority */
2303
2304 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2305
2306 s->priority = priority;
2307
2308 r = event_make_signal_data(s->event, s->signal.sig, &d);
2309 if (r < 0) {
2310 s->priority = old->priority;
2311 return r;
2312 }
2313
2314 event_unmask_signal_data(s->event, old, s->signal.sig);
2315 } else
2316 s->priority = priority;
fd38203a 2317
e1951c16 2318 event_source_pp_prioq_reshuffle(s);
fd38203a 2319
6203e07a
LP
2320 if (s->type == SOURCE_EXIT)
2321 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2322
fd38203a 2323 return 0;
97ef5391
LP
2324
2325fail:
2326 if (rm_inode)
2327 event_free_inode_data(s->event, new_inode_data);
2328
2329 if (rm_inotify)
2330 event_free_inotify_data(s->event, new_inotify_data);
2331
2332 return r;
fd38203a
LP
2333}
2334
cad143a8 2335_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
305f78bf 2336 assert_return(s, -EINVAL);
305f78bf 2337 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2338
cad143a8
LP
2339 if (ret)
2340 *ret = s->enabled;
2341
08c1eb0e 2342 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2343}
2344
b6d5481b
LP
2345static int event_source_offline(
2346 sd_event_source *s,
2347 int enabled,
2348 bool ratelimited) {
2349
2350 bool was_offline;
fd38203a
LP
2351 int r;
2352
ddfde737 2353 assert(s);
b6d5481b 2354 assert(enabled == SD_EVENT_OFF || ratelimited);
fd38203a 2355
ddfde737 2356 /* Unset the pending flag when this event source is disabled */
b6d5481b
LP
2357 if (s->enabled != SD_EVENT_OFF &&
2358 enabled == SD_EVENT_OFF &&
2359 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2360 r = source_set_pending(s, false);
2361 if (r < 0)
2362 return r;
2363 }
cc567911 2364
b6d5481b
LP
2365 was_offline = event_source_is_offline(s);
2366 s->enabled = enabled;
2367 s->ratelimited = ratelimited;
fd38203a 2368
ddfde737 2369 switch (s->type) {
fd38203a 2370
ddfde737
LP
2371 case SOURCE_IO:
2372 source_io_unregister(s);
2373 break;
ac989a78 2374
ddfde737
LP
2375 case SOURCE_TIME_REALTIME:
2376 case SOURCE_TIME_BOOTTIME:
2377 case SOURCE_TIME_MONOTONIC:
2378 case SOURCE_TIME_REALTIME_ALARM:
2379 case SOURCE_TIME_BOOTTIME_ALARM:
2380 event_source_time_prioq_reshuffle(s);
2381 break;
fd38203a 2382
ddfde737
LP
2383 case SOURCE_SIGNAL:
2384 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2385 break;
fd38203a 2386
ddfde737 2387 case SOURCE_CHILD:
b6d5481b
LP
2388 if (!was_offline) {
2389 assert(s->event->n_online_child_sources > 0);
2390 s->event->n_online_child_sources--;
2391 }
fd38203a 2392
ddfde737
LP
2393 if (EVENT_SOURCE_WATCH_PIDFD(s))
2394 source_child_pidfd_unregister(s);
2395 else
2396 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2397 break;
4807d2d0 2398
ddfde737
LP
2399 case SOURCE_EXIT:
2400 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2401 break;
fd38203a 2402
ddfde737
LP
2403 case SOURCE_DEFER:
2404 case SOURCE_POST:
2405 case SOURCE_INOTIFY:
2406 break;
fd38203a 2407
ddfde737
LP
2408 default:
2409 assert_not_reached("Wut? I shouldn't exist.");
2410 }
fd38203a 2411
b6d5481b 2412 return 1;
ddfde737 2413}
f8f3f926 2414
b6d5481b
LP
2415static int event_source_online(
2416 sd_event_source *s,
2417 int enabled,
2418 bool ratelimited) {
2419
2420 bool was_online;
ddfde737 2421 int r;
fd38203a 2422
ddfde737 2423 assert(s);
b6d5481b 2424 assert(enabled != SD_EVENT_OFF || !ratelimited);
305f78bf 2425
ddfde737 2426 /* Unset the pending flag when this event source is enabled */
b6d5481b
LP
2427 if (s->enabled == SD_EVENT_OFF &&
2428 enabled != SD_EVENT_OFF &&
2429 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2430 r = source_set_pending(s, false);
2431 if (r < 0)
2432 return r;
2433 }
9d3e3aa5 2434
b6d5481b
LP
2435 /* Are we really ready for onlining? */
2436 if (enabled == SD_EVENT_OFF || ratelimited) {
2437 /* Nope, we are not ready for onlining, then just update the precise state and exit */
2438 s->enabled = enabled;
2439 s->ratelimited = ratelimited;
2440 return 0;
2441 }
2442
2443 was_online = event_source_is_online(s);
2444
ddfde737 2445 switch (s->type) {
ddfde737 2446 case SOURCE_IO:
b6d5481b 2447 r = source_io_register(s, enabled, s->io.events);
d2eafe61 2448 if (r < 0)
ddfde737 2449 return r;
ddfde737 2450 break;
fd38203a 2451
ddfde737
LP
2452 case SOURCE_SIGNAL:
2453 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2454 if (r < 0) {
ddfde737
LP
2455 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2456 return r;
2457 }
fd38203a 2458
ddfde737 2459 break;
fd38203a 2460
ddfde737 2461 case SOURCE_CHILD:
ddfde737
LP
2462 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2463 /* yes, we have pidfd */
9da4cb2b 2464
b6d5481b 2465 r = source_child_pidfd_register(s, enabled);
ac9f2640 2466 if (r < 0)
9da4cb2b 2467 return r;
ddfde737
LP
2468 } else {
2469 /* no pidfd, or something other to watch for than WEXITED */
9da4cb2b 2470
ddfde737
LP
2471 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2472 if (r < 0) {
ddfde737
LP
2473 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2474 return r;
2475 }
2476 }
fd38203a 2477
b6d5481b
LP
2478 if (!was_online)
2479 s->event->n_online_child_sources++;
ddfde737 2480 break;
4807d2d0 2481
d2eafe61
ZJS
2482 case SOURCE_TIME_REALTIME:
2483 case SOURCE_TIME_BOOTTIME:
2484 case SOURCE_TIME_MONOTONIC:
2485 case SOURCE_TIME_REALTIME_ALARM:
2486 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737 2487 case SOURCE_EXIT:
ddfde737
LP
2488 case SOURCE_DEFER:
2489 case SOURCE_POST:
2490 case SOURCE_INOTIFY:
2491 break;
9da4cb2b 2492
ddfde737
LP
2493 default:
2494 assert_not_reached("Wut? I shouldn't exist.");
2495 }
f8f3f926 2496
b6d5481b
LP
2497 s->enabled = enabled;
2498 s->ratelimited = ratelimited;
d2eafe61
ZJS
2499
2500 /* Non-failing operations below */
2501 switch (s->type) {
2502 case SOURCE_TIME_REALTIME:
2503 case SOURCE_TIME_BOOTTIME:
2504 case SOURCE_TIME_MONOTONIC:
2505 case SOURCE_TIME_REALTIME_ALARM:
2506 case SOURCE_TIME_BOOTTIME_ALARM:
2507 event_source_time_prioq_reshuffle(s);
2508 break;
2509
2510 case SOURCE_EXIT:
2511 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2512 break;
2513
2514 default:
2515 break;
2516 }
2517
b6d5481b 2518 return 1;
ddfde737
LP
2519}
2520
2521_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2522 int r;
9da4cb2b 2523
ddfde737
LP
2524 assert_return(s, -EINVAL);
2525 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2526 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2527
ddfde737
LP
2528 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2529 if (s->event->state == SD_EVENT_FINISHED)
2530 return m == SD_EVENT_OFF ? 0 : -ESTALE;
305f78bf 2531
ddfde737
LP
2532 if (s->enabled == m) /* No change? */
2533 return 0;
9d3e3aa5 2534
ddfde737 2535 if (m == SD_EVENT_OFF)
b6d5481b 2536 r = event_source_offline(s, m, s->ratelimited);
ddfde737
LP
2537 else {
2538 if (s->enabled != SD_EVENT_OFF) {
2539 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2540 * event source is already enabled after all. */
2541 s->enabled = m;
2542 return 0;
fd38203a 2543 }
ddfde737 2544
b6d5481b 2545 r = event_source_online(s, m, s->ratelimited);
fd38203a 2546 }
ddfde737
LP
2547 if (r < 0)
2548 return r;
fd38203a 2549
e1951c16 2550 event_source_pp_prioq_reshuffle(s);
fd38203a
LP
2551 return 0;
2552}
2553
f7262a9f 2554_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2555 assert_return(s, -EINVAL);
2556 assert_return(usec, -EINVAL);
6a0f1f6d 2557 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2558 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2559
2560 *usec = s->time.next;
2561 return 0;
2562}
2563
f7262a9f 2564_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2565 int r;
6a0f1f6d 2566
305f78bf 2567 assert_return(s, -EINVAL);
6a0f1f6d 2568 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2569 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2570 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2571
2a0dc6cd
LP
2572 r = source_set_pending(s, false);
2573 if (r < 0)
2574 return r;
2576a19e 2575
2a0dc6cd 2576 s->time.next = usec;
fd38203a 2577
e1951c16 2578 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2579 return 0;
2580}
2581
d6a83dc4
LP
2582_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2583 usec_t t;
2584 int r;
2585
2586 assert_return(s, -EINVAL);
2587 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2588
2589 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2590 if (r < 0)
2591 return r;
2592
496db330
YW
2593 usec = usec_add(t, usec);
2594 if (usec == USEC_INFINITY)
d6a83dc4
LP
2595 return -EOVERFLOW;
2596
496db330 2597 return sd_event_source_set_time(s, usec);
d6a83dc4
LP
2598}
2599
f7262a9f 2600_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2601 assert_return(s, -EINVAL);
2602 assert_return(usec, -EINVAL);
6a0f1f6d 2603 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2604 assert_return(!event_pid_changed(s->event), -ECHILD);
2605
2606 *usec = s->time.accuracy;
2607 return 0;
2608}
2609
f7262a9f 2610_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2611 int r;
6a0f1f6d 2612
305f78bf 2613 assert_return(s, -EINVAL);
f5fbe71d 2614 assert_return(usec != UINT64_MAX, -EINVAL);
6a0f1f6d 2615 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2616 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2617 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2618
2a0dc6cd
LP
2619 r = source_set_pending(s, false);
2620 if (r < 0)
2621 return r;
2622
eaa3cbef
LP
2623 if (usec == 0)
2624 usec = DEFAULT_ACCURACY_USEC;
2625
eaa3cbef
LP
2626 s->time.accuracy = usec;
2627
e1951c16 2628 event_source_time_prioq_reshuffle(s);
6a0f1f6d
LP
2629 return 0;
2630}
2631
2632_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2633 assert_return(s, -EINVAL);
2634 assert_return(clock, -EINVAL);
2635 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2636 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2637
6a0f1f6d 2638 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2639 return 0;
2640}
2641
f7262a9f 2642_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2643 assert_return(s, -EINVAL);
2644 assert_return(pid, -EINVAL);
2645 assert_return(s->type == SOURCE_CHILD, -EDOM);
2646 assert_return(!event_pid_changed(s->event), -ECHILD);
2647
2648 *pid = s->child.pid;
2649 return 0;
2650}
2651
f8f3f926
LP
2652_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2653 assert_return(s, -EINVAL);
2654 assert_return(s->type == SOURCE_CHILD, -EDOM);
2655 assert_return(!event_pid_changed(s->event), -ECHILD);
2656
2657 if (s->child.pidfd < 0)
2658 return -EOPNOTSUPP;
2659
2660 return s->child.pidfd;
2661}
2662
2663_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2664 assert_return(s, -EINVAL);
2665 assert_return(s->type == SOURCE_CHILD, -EDOM);
2666 assert_return(!event_pid_changed(s->event), -ECHILD);
2667 assert_return(SIGNAL_VALID(sig), -EINVAL);
2668
2669 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2670 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2671 * available. */
2672 if (s->child.exited)
2673 return -ESRCH;
2674
2675 if (s->child.pidfd >= 0) {
2676 siginfo_t copy;
2677
2678 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2679 * structure here */
2680 if (si)
2681 copy = *si;
2682
2683 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2684 /* Let's propagate the error only if the system call is not implemented or prohibited */
2685 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2686 return -errno;
2687 } else
2688 return 0;
2689 }
2690
2691 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2692 * this here. */
2693 if (flags != 0)
2694 return -EOPNOTSUPP;
2695
2696 if (si) {
2697 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2698 siginfo_t copy = *si;
2699
2700 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2701 return -errno;
2702 } else if (kill(s->child.pid, sig) < 0)
2703 return -errno;
2704
2705 return 0;
2706}
2707
2708_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2709 assert_return(s, -EINVAL);
2710 assert_return(s->type == SOURCE_CHILD, -EDOM);
2711
2712 if (s->child.pidfd < 0)
2713 return -EOPNOTSUPP;
2714
2715 return s->child.pidfd_owned;
2716}
2717
2718_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2719 assert_return(s, -EINVAL);
2720 assert_return(s->type == SOURCE_CHILD, -EDOM);
2721
2722 if (s->child.pidfd < 0)
2723 return -EOPNOTSUPP;
2724
2725 s->child.pidfd_owned = own;
2726 return 0;
2727}
2728
2729_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2730 assert_return(s, -EINVAL);
2731 assert_return(s->type == SOURCE_CHILD, -EDOM);
2732
2733 return s->child.process_owned;
2734}
2735
2736_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2737 assert_return(s, -EINVAL);
2738 assert_return(s->type == SOURCE_CHILD, -EDOM);
2739
2740 s->child.process_owned = own;
2741 return 0;
2742}
2743
97ef5391
LP
2744_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2745 assert_return(s, -EINVAL);
2746 assert_return(mask, -EINVAL);
2747 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2748 assert_return(!event_pid_changed(s->event), -ECHILD);
2749
2750 *mask = s->inotify.mask;
2751 return 0;
2752}
2753
718db961 2754_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2755 int r;
2756
da7e457c 2757 assert_return(s, -EINVAL);
6203e07a 2758 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2759 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2760 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2761
2762 if (s->prepare == callback)
2763 return 0;
2764
2765 if (callback && s->prepare) {
2766 s->prepare = callback;
2767 return 0;
2768 }
2769
2770 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2771 if (r < 0)
2772 return r;
2773
2774 s->prepare = callback;
2775
2776 if (callback) {
2777 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2778 if (r < 0)
2779 return r;
2780 } else
2781 prioq_remove(s->event->prepare, s, &s->prepare_index);
2782
2783 return 0;
2784}
2785
f7262a9f 2786_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2787 assert_return(s, NULL);
fd38203a
LP
2788
2789 return s->userdata;
2790}
2791
8f726607
LP
2792_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2793 void *ret;
2794
2795 assert_return(s, NULL);
2796
2797 ret = s->userdata;
2798 s->userdata = userdata;
2799
2800 return ret;
2801}
2802
b6d5481b
LP
2803static int event_source_enter_ratelimited(sd_event_source *s) {
2804 int r;
2805
2806 assert(s);
2807
2808 /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
2809 * the end of the rate limit time window, much as if it was a timer event source. */
2810
2811 if (s->ratelimited)
2812 return 0; /* Already ratelimited, this is a NOP hence */
2813
2814 /* Make sure we can install a CLOCK_MONOTONIC event further down. */
2815 r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC);
2816 if (r < 0)
2817 return r;
2818
2819 /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
2820 * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
2821 * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
2822 if (EVENT_SOURCE_IS_TIME(s->type))
2823 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2824
2825 /* Now, let's add the event source to the monotonic clock instead */
2826 r = event_source_time_prioq_put(s, &s->event->monotonic);
2827 if (r < 0)
2828 goto fail;
2829
2830 /* And let's take the event source officially offline */
2831 r = event_source_offline(s, s->enabled, /* ratelimited= */ true);
2832 if (r < 0) {
2833 event_source_time_prioq_remove(s, &s->event->monotonic);
2834 goto fail;
2835 }
2836
2837 event_source_pp_prioq_reshuffle(s);
2838
2839 log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description));
2840 return 0;
2841
2842fail:
2843 /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
2844 * space for it should already be allocated. */
2845 if (EVENT_SOURCE_IS_TIME(s->type))
2846 assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0);
2847
2848 return r;
2849}
2850
2851static int event_source_leave_ratelimit(sd_event_source *s) {
2852 int r;
2853
2854 assert(s);
2855
2856 if (!s->ratelimited)
2857 return 0;
2858
2859 /* Let's take the event source out of the monotonic prioq first. */
2860 event_source_time_prioq_remove(s, &s->event->monotonic);
2861
2862 /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
2863 if (EVENT_SOURCE_IS_TIME(s->type)) {
2864 r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type));
2865 if (r < 0)
2866 goto fail;
2867 }
2868
2869 /* Let's try to take it online again. */
2870 r = event_source_online(s, s->enabled, /* ratelimited= */ false);
2871 if (r < 0) {
2872 /* Do something roughly sensible when this failed: undo the two prioq ops above */
2873 if (EVENT_SOURCE_IS_TIME(s->type))
2874 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2875
2876 goto fail;
2877 }
2878
2879 event_source_pp_prioq_reshuffle(s);
2880 ratelimit_reset(&s->rate_limit);
2881
2882 log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description));
2883 return 0;
2884
2885fail:
2886 /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
2887 * simply put it back in it, maybe we can then process it more successfully next iteration. */
2888 assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0);
2889
2890 return r;
2891}
2892
c2ba3ad6
LP
2893static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2894 usec_t c;
2895 assert(e);
2896 assert(a <= b);
2897
2898 if (a <= 0)
2899 return 0;
393003e1
LP
2900 if (a >= USEC_INFINITY)
2901 return USEC_INFINITY;
c2ba3ad6
LP
2902
2903 if (b <= a + 1)
2904 return a;
2905
52444dc4
LP
2906 initialize_perturb(e);
2907
c2ba3ad6
LP
2908 /*
2909 Find a good time to wake up again between times a and b. We
2910 have two goals here:
2911
2912 a) We want to wake up as seldom as possible, hence prefer
2913 later times over earlier times.
2914
2915 b) But if we have to wake up, then let's make sure to
2916 dispatch as much as possible on the entire system.
2917
2918 We implement this by waking up everywhere at the same time
850516e0 2919 within any given minute if we can, synchronised via the
c2ba3ad6 2920 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
2921 then we try to find the same spot in every 10s, then 1s and
2922 then 250ms step. Otherwise, we pick the last possible time
2923 to wake up.
c2ba3ad6
LP
2924 */
2925
850516e0
LP
2926 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2927 if (c >= b) {
2928 if (_unlikely_(c < USEC_PER_MINUTE))
2929 return b;
2930
2931 c -= USEC_PER_MINUTE;
2932 }
2933
ba276c81
LP
2934 if (c >= a)
2935 return c;
2936
2937 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2938 if (c >= b) {
2939 if (_unlikely_(c < USEC_PER_SEC*10))
2940 return b;
2941
2942 c -= USEC_PER_SEC*10;
2943 }
2944
850516e0
LP
2945 if (c >= a)
2946 return c;
2947
2948 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
2949 if (c >= b) {
2950 if (_unlikely_(c < USEC_PER_SEC))
2951 return b;
2952
2953 c -= USEC_PER_SEC;
2954 }
2955
2956 if (c >= a)
2957 return c;
2958
2959 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2960 if (c >= b) {
2961 if (_unlikely_(c < USEC_PER_MSEC*250))
2962 return b;
2963
2964 c -= USEC_PER_MSEC*250;
2965 }
2966
2967 if (c >= a)
2968 return c;
2969
2970 return b;
2971}
2972
fd38203a
LP
2973static int event_arm_timer(
2974 sd_event *e,
6a0f1f6d 2975 struct clock_data *d) {
fd38203a
LP
2976
2977 struct itimerspec its = {};
c2ba3ad6
LP
2978 sd_event_source *a, *b;
2979 usec_t t;
fd38203a 2980
cde93897 2981 assert(e);
6a0f1f6d 2982 assert(d);
fd38203a 2983
d06441da 2984 if (!d->needs_rearm)
212bbb17
TG
2985 return 0;
2986 else
2987 d->needs_rearm = false;
2988
6a0f1f6d 2989 a = prioq_peek(d->earliest);
b6d5481b 2990 if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) {
72aedc1e 2991
6a0f1f6d 2992 if (d->fd < 0)
c57b5ca3
LP
2993 return 0;
2994
3a43da28 2995 if (d->next == USEC_INFINITY)
72aedc1e
LP
2996 return 0;
2997
2998 /* disarm */
15c689d7
LP
2999 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3000 return -errno;
72aedc1e 3001
3a43da28 3002 d->next = USEC_INFINITY;
fd38203a 3003 return 0;
72aedc1e 3004 }
fd38203a 3005
6a0f1f6d 3006 b = prioq_peek(d->latest);
baf76283 3007 assert_se(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 3008
b6d5481b 3009 t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b));
6a0f1f6d 3010 if (d->next == t)
fd38203a
LP
3011 return 0;
3012
6a0f1f6d 3013 assert_se(d->fd >= 0);
fd38203a 3014
c2ba3ad6 3015 if (t == 0) {
fd38203a
LP
3016 /* We don' want to disarm here, just mean some time looooong ago. */
3017 its.it_value.tv_sec = 0;
3018 its.it_value.tv_nsec = 1;
3019 } else
c2ba3ad6 3020 timespec_store(&its.it_value, t);
fd38203a 3021
15c689d7 3022 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 3023 return -errno;
fd38203a 3024
6a0f1f6d 3025 d->next = t;
fd38203a
LP
3026 return 0;
3027}
3028
9a800b56 3029static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
3030 assert(e);
3031 assert(s);
3032 assert(s->type == SOURCE_IO);
3033
9a800b56
LP
3034 /* If the event source was already pending, we just OR in the
3035 * new revents, otherwise we reset the value. The ORing is
3036 * necessary to handle EPOLLONESHOT events properly where
3037 * readability might happen independently of writability, and
3038 * we need to keep track of both */
3039
3040 if (s->pending)
3041 s->io.revents |= revents;
3042 else
3043 s->io.revents = revents;
fd38203a 3044
fd38203a
LP
3045 return source_set_pending(s, true);
3046}
3047
72aedc1e 3048static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
3049 uint64_t x;
3050 ssize_t ss;
3051
3052 assert(e);
da7e457c 3053 assert(fd >= 0);
72aedc1e 3054
305f78bf 3055 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
3056
3057 ss = read(fd, &x, sizeof(x));
3058 if (ss < 0) {
945c2931 3059 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
3060 return 0;
3061
3062 return -errno;
3063 }
3064
8d35dae7 3065 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
3066 return -EIO;
3067
cde93897 3068 if (next)
3a43da28 3069 *next = USEC_INFINITY;
72aedc1e 3070
fd38203a
LP
3071 return 0;
3072}
3073
305f78bf
LP
3074static int process_timer(
3075 sd_event *e,
3076 usec_t n,
6a0f1f6d 3077 struct clock_data *d) {
305f78bf 3078
fd38203a
LP
3079 sd_event_source *s;
3080 int r;
3081
3082 assert(e);
6a0f1f6d 3083 assert(d);
fd38203a
LP
3084
3085 for (;;) {
6a0f1f6d 3086 s = prioq_peek(d->earliest);
b6d5481b
LP
3087 if (!s || time_event_source_next(s) > n)
3088 break;
3089
3090 if (s->ratelimited) {
3091 /* This is an event sources whose ratelimit window has ended. Let's turn it on
3092 * again. */
3093 assert(s->ratelimited);
3094
3095 r = event_source_leave_ratelimit(s);
3096 if (r < 0)
3097 return r;
3098
3099 continue;
3100 }
3101
3102 if (s->enabled == SD_EVENT_OFF || s->pending)
fd38203a
LP
3103 break;
3104
3105 r = source_set_pending(s, true);
3106 if (r < 0)
3107 return r;
3108
e1951c16 3109 event_source_time_prioq_reshuffle(s);
fd38203a
LP
3110 }
3111
3112 return 0;
3113}
3114
efd3be9d
YW
3115static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
3116 int64_t min_priority = threshold;
3117 bool something_new = false;
fd38203a 3118 sd_event_source *s;
fd38203a
LP
3119 int r;
3120
3121 assert(e);
efd3be9d
YW
3122 assert(ret_min_priority);
3123
3124 if (!e->need_process_child) {
3125 *ret_min_priority = min_priority;
3126 return 0;
3127 }
fd38203a 3128
c2ba3ad6
LP
3129 e->need_process_child = false;
3130
fd38203a
LP
3131 /*
3132 So, this is ugly. We iteratively invoke waitid() with P_PID
3133 + WNOHANG for each PID we wait for, instead of using
3134 P_ALL. This is because we only want to get child
3135 information of very specific child processes, and not all
3136 of them. We might not have processed the SIGCHLD even of a
3137 previous invocation and we don't want to maintain a
3138 unbounded *per-child* event queue, hence we really don't
3139 want anything flushed out of the kernel's queue that we
3140 don't care about. Since this is O(n) this means that if you
3141 have a lot of processes you probably want to handle SIGCHLD
3142 yourself.
08cd1552
LP
3143
3144 We do not reap the children here (by using WNOWAIT), this
3145 is only done after the event source is dispatched so that
3146 the callback still sees the process as a zombie.
fd38203a
LP
3147 */
3148
90e74a66 3149 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
3150 assert(s->type == SOURCE_CHILD);
3151
efd3be9d
YW
3152 if (s->priority > threshold)
3153 continue;
3154
fd38203a
LP
3155 if (s->pending)
3156 continue;
3157
b6d5481b 3158 if (event_source_is_offline(s))
fd38203a
LP
3159 continue;
3160
f8f3f926
LP
3161 if (s->child.exited)
3162 continue;
3163
3164 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
3165 continue;
3166
fd38203a 3167 zero(s->child.siginfo);
15c689d7
LP
3168 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
3169 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
bfd9bfcc 3170 return negative_errno();
fd38203a
LP
3171
3172 if (s->child.siginfo.si_pid != 0) {
945c2931 3173 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3174
f8f3f926
LP
3175 if (zombie)
3176 s->child.exited = true;
3177
08cd1552
LP
3178 if (!zombie && (s->child.options & WEXITED)) {
3179 /* If the child isn't dead then let's
3180 * immediately remove the state change
3181 * from the queue, since there's no
3182 * benefit in leaving it queued */
3183
3184 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 3185 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
3186 }
3187
fd38203a
LP
3188 r = source_set_pending(s, true);
3189 if (r < 0)
3190 return r;
efd3be9d
YW
3191 if (r > 0) {
3192 something_new = true;
3193 min_priority = MIN(min_priority, s->priority);
3194 }
fd38203a
LP
3195 }
3196 }
3197
efd3be9d
YW
3198 *ret_min_priority = min_priority;
3199 return something_new;
fd38203a
LP
3200}
3201
f8f3f926
LP
3202static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
3203 assert(e);
3204 assert(s);
3205 assert(s->type == SOURCE_CHILD);
3206
3207 if (s->pending)
3208 return 0;
3209
b6d5481b 3210 if (event_source_is_offline(s))
f8f3f926
LP
3211 return 0;
3212
3213 if (!EVENT_SOURCE_WATCH_PIDFD(s))
3214 return 0;
3215
3216 zero(s->child.siginfo);
3217 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
3218 return -errno;
3219
3220 if (s->child.siginfo.si_pid == 0)
3221 return 0;
3222
3223 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
3224 s->child.exited = true;
3225
3226 return source_set_pending(s, true);
3227}
3228
efd3be9d 3229static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
fd38203a
LP
3230 int r;
3231
da7e457c 3232 assert(e);
97ef5391 3233 assert(d);
305f78bf 3234 assert_return(events == EPOLLIN, -EIO);
efd3be9d 3235 assert(min_priority);
fd38203a 3236
9da4cb2b
LP
3237 /* If there's a signal queued on this priority and SIGCHLD is
3238 on this priority too, then make sure to recheck the
3239 children we watch. This is because we only ever dequeue
3240 the first signal per priority, and if we dequeue one, and
3241 SIGCHLD might be enqueued later we wouldn't know, but we
3242 might have higher priority children we care about hence we
3243 need to check that explicitly. */
3244
3245 if (sigismember(&d->sigset, SIGCHLD))
3246 e->need_process_child = true;
3247
3248 /* If there's already an event source pending for this
3249 * priority we don't read another */
3250 if (d->current)
3251 return 0;
3252
fd38203a 3253 for (;;) {
0eb2e0e3 3254 struct signalfd_siginfo si;
7057bd99 3255 ssize_t n;
92daebc0 3256 sd_event_source *s = NULL;
fd38203a 3257
9da4cb2b 3258 n = read(d->fd, &si, sizeof(si));
7057bd99 3259 if (n < 0) {
945c2931 3260 if (IN_SET(errno, EAGAIN, EINTR))
efd3be9d 3261 return 0;
fd38203a
LP
3262
3263 return -errno;
3264 }
3265
7057bd99 3266 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3267 return -EIO;
3268
6eb7c172 3269 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3270
92daebc0
LP
3271 if (e->signal_sources)
3272 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3273 if (!s)
3274 continue;
9da4cb2b
LP
3275 if (s->pending)
3276 continue;
fd38203a
LP
3277
3278 s->signal.siginfo = si;
9da4cb2b
LP
3279 d->current = s;
3280
fd38203a
LP
3281 r = source_set_pending(s, true);
3282 if (r < 0)
3283 return r;
efd3be9d
YW
3284 if (r > 0 && *min_priority >= s->priority) {
3285 *min_priority = s->priority;
3286 return 1; /* an event source with smaller priority is queued. */
3287 }
9da4cb2b 3288
efd3be9d 3289 return 0;
fd38203a 3290 }
fd38203a
LP
3291}
3292
efd3be9d 3293static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) {
97ef5391
LP
3294 ssize_t n;
3295
3296 assert(e);
3297 assert(d);
3298
3299 assert_return(revents == EPOLLIN, -EIO);
3300
3301 /* If there's already an event source pending for this priority, don't read another */
3302 if (d->n_pending > 0)
3303 return 0;
3304
3305 /* Is the read buffer non-empty? If so, let's not read more */
3306 if (d->buffer_filled > 0)
3307 return 0;
3308
efd3be9d
YW
3309 if (d->priority > threshold)
3310 return 0;
3311
97ef5391
LP
3312 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3313 if (n < 0) {
3314 if (IN_SET(errno, EAGAIN, EINTR))
3315 return 0;
3316
3317 return -errno;
3318 }
3319
3320 assert(n > 0);
3321 d->buffer_filled = (size_t) n;
3322 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3323
3324 return 1;
3325}
3326
3327static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3328 assert(e);
3329 assert(d);
3330 assert(sz <= d->buffer_filled);
3331
3332 if (sz == 0)
3333 return;
3334
3335 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3336 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3337 d->buffer_filled -= sz;
3338
3339 if (d->buffer_filled == 0)
3340 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3341}
3342
3343static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3344 int r;
3345
3346 assert(e);
3347 assert(d);
3348
3349 /* If there's already an event source pending for this priority, don't read another */
3350 if (d->n_pending > 0)
3351 return 0;
3352
3353 while (d->buffer_filled > 0) {
3354 size_t sz;
3355
3356 /* Let's validate that the event structures are complete */
3357 if (d->buffer_filled < offsetof(struct inotify_event, name))
3358 return -EIO;
3359
3360 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3361 if (d->buffer_filled < sz)
3362 return -EIO;
3363
3364 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3365 struct inode_data *inode_data;
97ef5391
LP
3366
3367 /* The queue overran, let's pass this event to all event sources connected to this inotify
3368 * object */
3369
90e74a66 3370 HASHMAP_FOREACH(inode_data, d->inodes) {
97ef5391
LP
3371 sd_event_source *s;
3372
3373 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3374
b6d5481b 3375 if (event_source_is_offline(s))
97ef5391
LP
3376 continue;
3377
3378 r = source_set_pending(s, true);
3379 if (r < 0)
3380 return r;
3381 }
3382 }
3383 } else {
3384 struct inode_data *inode_data;
3385 sd_event_source *s;
3386
3387 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3388 * our watch descriptor table. */
3389 if (d->buffer.ev.mask & IN_IGNORED) {
3390
3391 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3392 if (!inode_data) {
3393 event_inotify_data_drop(e, d, sz);
3394 continue;
3395 }
3396
3397 /* The watch descriptor was removed by the kernel, let's drop it here too */
3398 inode_data->wd = -1;
3399 } else {
3400 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3401 if (!inode_data) {
3402 event_inotify_data_drop(e, d, sz);
3403 continue;
3404 }
3405 }
3406
3407 /* Trigger all event sources that are interested in these events. Also trigger all event
3408 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3409 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3410
b6d5481b 3411 if (event_source_is_offline(s))
97ef5391
LP
3412 continue;
3413
3414 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3415 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3416 continue;
3417
3418 r = source_set_pending(s, true);
3419 if (r < 0)
3420 return r;
3421 }
3422 }
3423
3424 /* Something pending now? If so, let's finish, otherwise let's read more. */
3425 if (d->n_pending > 0)
3426 return 1;
3427 }
3428
3429 return 0;
3430}
3431
3432static int process_inotify(sd_event *e) {
3433 struct inotify_data *d;
3434 int r, done = 0;
3435
3436 assert(e);
3437
3438 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3439 r = event_inotify_data_process(e, d);
3440 if (r < 0)
3441 return r;
3442 if (r > 0)
3443 done ++;
3444 }
3445
3446 return done;
3447}
3448
fd38203a 3449static int source_dispatch(sd_event_source *s) {
b778cba4 3450 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
8f5c235d 3451 EventSourceType saved_type;
fe8245eb 3452 int r = 0;
fd38203a
LP
3453
3454 assert(s);
6203e07a 3455 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3456
b778cba4
LP
3457 /* Save the event source type, here, so that we still know it after the event callback which might
3458 * invalidate the event. */
8f5c235d
LP
3459 saved_type = s->type;
3460
de02634c 3461 /* Similarly, store a reference to the event loop object, so that we can still access it after the
b778cba4
LP
3462 * callback might have invalidated/disconnected the event source. */
3463 saved_event = sd_event_ref(s->event);
3464
de02634c 3465 /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
b6d5481b
LP
3466 assert(!s->ratelimited);
3467 if (!ratelimit_below(&s->rate_limit)) {
3468 r = event_source_enter_ratelimited(s);
3469 if (r < 0)
3470 return r;
3471
3472 return 1;
3473 }
3474
945c2931 3475 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3476 r = source_set_pending(s, false);
3477 if (r < 0)
3478 return r;
3479 }
fd38203a 3480
6e9feda3
LP
3481 if (s->type != SOURCE_POST) {
3482 sd_event_source *z;
6e9feda3 3483
de02634c 3484 /* If we execute a non-post source, let's mark all post sources as pending. */
6e9feda3 3485
90e74a66 3486 SET_FOREACH(z, s->event->post_sources) {
b6d5481b 3487 if (event_source_is_offline(z))
6e9feda3
LP
3488 continue;
3489
3490 r = source_set_pending(z, true);
3491 if (r < 0)
3492 return r;
3493 }
3494 }
3495
baf76283
LP
3496 if (s->enabled == SD_EVENT_ONESHOT) {
3497 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3498 if (r < 0)
3499 return r;
3500 }
3501
12179984 3502 s->dispatching = true;
b7484e2a 3503
fd38203a
LP
3504 switch (s->type) {
3505
3506 case SOURCE_IO:
3507 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3508 break;
3509
6a0f1f6d 3510 case SOURCE_TIME_REALTIME:
a8548816 3511 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3512 case SOURCE_TIME_MONOTONIC:
3513 case SOURCE_TIME_REALTIME_ALARM:
3514 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3515 r = s->time.callback(s, s->time.next, s->userdata);
3516 break;
3517
3518 case SOURCE_SIGNAL:
3519 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3520 break;
3521
08cd1552
LP
3522 case SOURCE_CHILD: {
3523 bool zombie;
3524
945c2931 3525 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3526
fd38203a 3527 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3528
3529 /* Now, reap the PID for good. */
f8f3f926 3530 if (zombie) {
cc59d290 3531 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3532 s->child.waited = true;
3533 }
08cd1552 3534
fd38203a 3535 break;
08cd1552 3536 }
fd38203a
LP
3537
3538 case SOURCE_DEFER:
3539 r = s->defer.callback(s, s->userdata);
3540 break;
da7e457c 3541
6e9feda3
LP
3542 case SOURCE_POST:
3543 r = s->post.callback(s, s->userdata);
3544 break;
3545
6203e07a
LP
3546 case SOURCE_EXIT:
3547 r = s->exit.callback(s, s->userdata);
da7e457c 3548 break;
9d3e3aa5 3549
97ef5391
LP
3550 case SOURCE_INOTIFY: {
3551 struct sd_event *e = s->event;
3552 struct inotify_data *d;
3553 size_t sz;
3554
3555 assert(s->inotify.inode_data);
3556 assert_se(d = s->inotify.inode_data->inotify_data);
3557
3558 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3559 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3560 assert(d->buffer_filled >= sz);
3561
3562 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3563
3564 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3565 * buffer. */
3566 if (d->n_pending == 0)
3567 event_inotify_data_drop(e, d, sz);
3568
3569 break;
3570 }
3571
9d3e3aa5 3572 case SOURCE_WATCHDOG:
a71fe8b8 3573 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3574 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
9d3e3aa5 3575 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
3576 }
3577
12179984
LP
3578 s->dispatching = false;
3579
b778cba4
LP
3580 if (r < 0) {
3581 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3582 strna(s->description),
3583 event_source_type_to_string(saved_type),
3584 s->exit_on_failure ? "exiting" : "disabling");
3585
3586 if (s->exit_on_failure)
3587 (void) sd_event_exit(saved_event, r);
3588 }
12179984
LP
3589
3590 if (s->n_ref == 0)
3591 source_free(s);
3592 else if (r < 0)
6203e07a 3593 sd_event_source_set_enabled(s, SD_EVENT_OFF);
b7484e2a 3594
6203e07a 3595 return 1;
fd38203a
LP
3596}
3597
3598static int event_prepare(sd_event *e) {
3599 int r;
3600
3601 assert(e);
3602
3603 for (;;) {
3604 sd_event_source *s;
3605
3606 s = prioq_peek(e->prepare);
b6d5481b 3607 if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s))
fd38203a
LP
3608 break;
3609
3610 s->prepare_iteration = e->iteration;
3611 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3612 if (r < 0)
3613 return r;
3614
3615 assert(s->prepare);
12179984
LP
3616
3617 s->dispatching = true;
fd38203a 3618 r = s->prepare(s, s->userdata);
12179984
LP
3619 s->dispatching = false;
3620
b778cba4
LP
3621 if (r < 0) {
3622 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3623 strna(s->description),
3624 event_source_type_to_string(s->type),
3625 s->exit_on_failure ? "exiting" : "disabling");
3626
3627 if (s->exit_on_failure)
3628 (void) sd_event_exit(e, r);
3629 }
fd38203a 3630
12179984
LP
3631 if (s->n_ref == 0)
3632 source_free(s);
3633 else if (r < 0)
3634 sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3635 }
3636
3637 return 0;
3638}
3639
6203e07a 3640static int dispatch_exit(sd_event *e) {
da7e457c
LP
3641 sd_event_source *p;
3642 int r;
3643
3644 assert(e);
3645
6203e07a 3646 p = prioq_peek(e->exit);
b6d5481b 3647 if (!p || event_source_is_offline(p)) {
da7e457c
LP
3648 e->state = SD_EVENT_FINISHED;
3649 return 0;
3650 }
3651
f814c871 3652 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
da7e457c 3653 e->iteration++;
6203e07a 3654 e->state = SD_EVENT_EXITING;
da7e457c 3655 r = source_dispatch(p);
2b0c9ef7 3656 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3657 return r;
3658}
3659
c2ba3ad6
LP
3660static sd_event_source* event_next_pending(sd_event *e) {
3661 sd_event_source *p;
3662
da7e457c
LP
3663 assert(e);
3664
c2ba3ad6
LP
3665 p = prioq_peek(e->pending);
3666 if (!p)
3667 return NULL;
3668
b6d5481b 3669 if (event_source_is_offline(p))
c2ba3ad6
LP
3670 return NULL;
3671
3672 return p;
3673}
3674
cde93897
LP
3675static int arm_watchdog(sd_event *e) {
3676 struct itimerspec its = {};
3677 usec_t t;
cde93897
LP
3678
3679 assert(e);
3680 assert(e->watchdog_fd >= 0);
3681
3682 t = sleep_between(e,
3683 e->watchdog_last + (e->watchdog_period / 2),
3684 e->watchdog_last + (e->watchdog_period * 3 / 4));
3685
3686 timespec_store(&its.it_value, t);
3687
75145780
LP
3688 /* Make sure we never set the watchdog to 0, which tells the
3689 * kernel to disable it. */
3690 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3691 its.it_value.tv_nsec = 1;
3692
15c689d7 3693 if (timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897
LP
3694 return -errno;
3695
3696 return 0;
3697}
3698
3699static int process_watchdog(sd_event *e) {
3700 assert(e);
3701
3702 if (!e->watchdog)
3703 return 0;
3704
3705 /* Don't notify watchdog too often */
3706 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3707 return 0;
3708
3709 sd_notify(false, "WATCHDOG=1");
3710 e->watchdog_last = e->timestamp.monotonic;
3711
3712 return arm_watchdog(e);
3713}
3714
97ef5391
LP
3715static void event_close_inode_data_fds(sd_event *e) {
3716 struct inode_data *d;
3717
3718 assert(e);
3719
3720 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3721 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3722 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3723 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3724 * compromise. */
3725
3726 while ((d = e->inode_data_to_close)) {
3727 assert(d->fd >= 0);
3728 d->fd = safe_close(d->fd);
3729
3730 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3731 }
3732}
3733
c45a5a74
TG
3734_public_ int sd_event_prepare(sd_event *e) {
3735 int r;
fd38203a 3736
da7e457c 3737 assert_return(e, -EINVAL);
b937d761 3738 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3739 assert_return(!event_pid_changed(e), -ECHILD);
3740 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3741 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3742
e5446015
LP
3743 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3744 * this check here once, since gettid() is typically not cached, and thus want to minimize
3745 * syscalls */
3746 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3747
f814c871
LP
3748 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
3749 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3750
6203e07a 3751 if (e->exit_requested)
c45a5a74 3752 goto pending;
fd38203a
LP
3753
3754 e->iteration++;
3755
0be6c2f6 3756 e->state = SD_EVENT_PREPARING;
fd38203a 3757 r = event_prepare(e);
0be6c2f6 3758 e->state = SD_EVENT_INITIAL;
fd38203a 3759 if (r < 0)
c45a5a74 3760 return r;
fd38203a 3761
6a0f1f6d
LP
3762 r = event_arm_timer(e, &e->realtime);
3763 if (r < 0)
c45a5a74 3764 return r;
6a0f1f6d 3765
a8548816
TG
3766 r = event_arm_timer(e, &e->boottime);
3767 if (r < 0)
c45a5a74 3768 return r;
a8548816 3769
6a0f1f6d
LP
3770 r = event_arm_timer(e, &e->monotonic);
3771 if (r < 0)
c45a5a74 3772 return r;
6a0f1f6d
LP
3773
3774 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3775 if (r < 0)
c45a5a74 3776 return r;
fd38203a 3777
6a0f1f6d 3778 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3779 if (r < 0)
c45a5a74 3780 return r;
fd38203a 3781
97ef5391
LP
3782 event_close_inode_data_fds(e);
3783
1b5995b0 3784 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3785 goto pending;
3786
2b0c9ef7 3787 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3788
3789 return 0;
3790
3791pending:
2b0c9ef7 3792 e->state = SD_EVENT_ARMED;
6d148a84
TG
3793 r = sd_event_wait(e, 0);
3794 if (r == 0)
2b0c9ef7 3795 e->state = SD_EVENT_ARMED;
6d148a84
TG
3796
3797 return r;
c45a5a74
TG
3798}
3799
798445ab
LP
3800static int epoll_wait_usec(
3801 int fd,
3802 struct epoll_event *events,
3803 int maxevents,
3804 usec_t timeout) {
3805
798445ab 3806 int r, msec;
39f756d3
ZJS
3807#if 0
3808 static bool epoll_pwait2_absent = false;
798445ab 3809
39f756d3
ZJS
3810 /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not.
3811 *
3812 * FIXME: this is temporarily disabled until epoll_pwait2() becomes more widely available.
3813 * See https://github.com/systemd/systemd/pull/18973 and
3814 * https://github.com/systemd/systemd/issues/19052. */
798445ab
LP
3815
3816 if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
3817 struct timespec ts;
3818
3819 r = epoll_pwait2(fd,
3820 events,
3821 maxevents,
3822 timespec_store(&ts, timeout),
3823 NULL);
3824 if (r >= 0)
3825 return r;
7cb45dbf 3826 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
798445ab
LP
3827 return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
3828 * supported. */
3829
3830 epoll_pwait2_absent = true;
3831 }
39f756d3 3832#endif
798445ab
LP
3833
3834 if (timeout == USEC_INFINITY)
3835 msec = -1;
3836 else {
3837 usec_t k;
3838
3839 k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
3840 if (k >= INT_MAX)
3841 msec = INT_MAX; /* Saturate */
3842 else
3843 msec = (int) k;
3844 }
3845
3846 r = epoll_wait(fd,
3847 events,
3848 maxevents,
3849 msec);
3850 if (r < 0)
3851 return -errno;
3852
3853 return r;
3854}
3855
efd3be9d 3856static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
319a4f4b 3857 size_t n_event_queue, m, n_event_max;
efd3be9d
YW
3858 int64_t min_priority = threshold;
3859 bool something_new = false;
798445ab 3860 int r;
c45a5a74 3861
efd3be9d
YW
3862 assert(e);
3863 assert(ret_min_priority);
6a0f1f6d 3864
8b9708d1 3865 n_event_queue = MAX(e->n_sources, 1u);
319a4f4b 3866 if (!GREEDY_REALLOC(e->event_queue, n_event_queue))
5cddd924 3867 return -ENOMEM;
fd38203a 3868
319a4f4b
LP
3869 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
3870
97ef5391
LP
3871 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3872 if (e->inotify_data_buffered)
798445ab 3873 timeout = 0;
97ef5391 3874
8b9708d1 3875 for (;;) {
319a4f4b
LP
3876 r = epoll_wait_usec(
3877 e->epoll_fd,
3878 e->event_queue,
3879 n_event_max,
3880 timeout);
798445ab 3881 if (r < 0)
efd3be9d 3882 return r;
c45a5a74 3883
8b9708d1
YW
3884 m = (size_t) r;
3885
319a4f4b 3886 if (m < n_event_max)
8b9708d1
YW
3887 break;
3888
319a4f4b 3889 if (n_event_max >= n_event_queue * 10)
8b9708d1
YW
3890 break;
3891
319a4f4b 3892 if (!GREEDY_REALLOC(e->event_queue, n_event_max + n_event_queue))
8b9708d1
YW
3893 return -ENOMEM;
3894
319a4f4b 3895 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
798445ab 3896 timeout = 0;
da7e457c 3897 }
fd38203a 3898
efd3be9d
YW
3899 /* Set timestamp only when this is called first time. */
3900 if (threshold == INT64_MAX)
3901 triple_timestamp_get(&e->timestamp);
fd38203a 3902
8b9708d1 3903 for (size_t i = 0; i < m; i++) {
fd38203a 3904
5cddd924
LP
3905 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3906 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 3907 else {
5cddd924 3908 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
3909
3910 switch (*t) {
3911
f8f3f926 3912 case WAKEUP_EVENT_SOURCE: {
5cddd924 3913 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
3914
3915 assert(s);
3916
efd3be9d
YW
3917 if (s->priority > threshold)
3918 continue;
3919
3920 min_priority = MIN(min_priority, s->priority);
3921
f8f3f926
LP
3922 switch (s->type) {
3923
3924 case SOURCE_IO:
5cddd924 3925 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
3926 break;
3927
3928 case SOURCE_CHILD:
5cddd924 3929 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
3930 break;
3931
3932 default:
3933 assert_not_reached("Unexpected event source type");
3934 }
3935
9da4cb2b 3936 break;
f8f3f926 3937 }
fd38203a 3938
9da4cb2b 3939 case WAKEUP_CLOCK_DATA: {
5cddd924 3940 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
3941
3942 assert(d);
3943
5cddd924 3944 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
3945 break;
3946 }
3947
3948 case WAKEUP_SIGNAL_DATA:
efd3be9d 3949 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority);
9da4cb2b
LP
3950 break;
3951
97ef5391 3952 case WAKEUP_INOTIFY_DATA:
efd3be9d 3953 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold);
97ef5391
LP
3954 break;
3955
9da4cb2b
LP
3956 default:
3957 assert_not_reached("Invalid wake-up pointer");
3958 }
3959 }
efd3be9d
YW
3960 if (r < 0)
3961 return r;
3962 if (r > 0)
3963 something_new = true;
3964 }
3965
3966 *ret_min_priority = min_priority;
3967 return something_new;
3968}
3969
3970_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3971 int r;
3972
3973 assert_return(e, -EINVAL);
3974 assert_return(e = event_resolve(e), -ENOPKG);
3975 assert_return(!event_pid_changed(e), -ECHILD);
3976 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3977 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3978
3979 if (e->exit_requested) {
3980 e->state = SD_EVENT_PENDING;
3981 return 1;
3982 }
3983
3984 for (int64_t threshold = INT64_MAX; ; threshold--) {
3985 int64_t epoll_min_priority, child_min_priority;
3986
3987 /* There may be a possibility that new epoll (especially IO) and child events are
3988 * triggered just after process_epoll() call but before process_child(), and the new IO
3989 * events may have higher priority than the child events. To salvage these events,
3990 * let's call epoll_wait() again, but accepts only events with higher priority than the
3991 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
3992 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
3993 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
3994
3995 r = process_epoll(e, timeout, threshold, &epoll_min_priority);
3996 if (r == -EINTR) {
3997 e->state = SD_EVENT_PENDING;
3998 return 1;
3999 }
4000 if (r < 0)
4001 goto finish;
4002 if (r == 0 && threshold < INT64_MAX)
4003 /* No new epoll event. */
4004 break;
4005
4006 r = process_child(e, threshold, &child_min_priority);
fd38203a 4007 if (r < 0)
da7e457c 4008 goto finish;
efd3be9d
YW
4009 if (r == 0)
4010 /* No new child event. */
4011 break;
4012
4013 threshold = MIN(epoll_min_priority, child_min_priority);
4014 if (threshold == INT64_MIN)
4015 break;
4016
4017 timeout = 0;
fd38203a
LP
4018 }
4019
cde93897
LP
4020 r = process_watchdog(e);
4021 if (r < 0)
4022 goto finish;
4023
6a0f1f6d
LP
4024 r = process_timer(e, e->timestamp.realtime, &e->realtime);
4025 if (r < 0)
4026 goto finish;
4027
e475d10c 4028 r = process_timer(e, e->timestamp.boottime, &e->boottime);
a8548816
TG
4029 if (r < 0)
4030 goto finish;
4031
6a0f1f6d
LP
4032 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
4033 if (r < 0)
4034 goto finish;
4035
4036 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 4037 if (r < 0)
da7e457c 4038 goto finish;
fd38203a 4039
e475d10c 4040 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 4041 if (r < 0)
da7e457c 4042 goto finish;
fd38203a 4043
97ef5391
LP
4044 r = process_inotify(e);
4045 if (r < 0)
4046 goto finish;
4047
c45a5a74
TG
4048 if (event_next_pending(e)) {
4049 e->state = SD_EVENT_PENDING;
c45a5a74 4050 return 1;
da7e457c
LP
4051 }
4052
c45a5a74 4053 r = 0;
fd38203a 4054
da7e457c 4055finish:
2b0c9ef7 4056 e->state = SD_EVENT_INITIAL;
da7e457c
LP
4057
4058 return r;
fd38203a
LP
4059}
4060
c45a5a74
TG
4061_public_ int sd_event_dispatch(sd_event *e) {
4062 sd_event_source *p;
4063 int r;
4064
4065 assert_return(e, -EINVAL);
b937d761 4066 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4067 assert_return(!event_pid_changed(e), -ECHILD);
4068 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4069 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
4070
4071 if (e->exit_requested)
4072 return dispatch_exit(e);
4073
4074 p = event_next_pending(e);
4075 if (p) {
f814c871 4076 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
c45a5a74
TG
4077
4078 e->state = SD_EVENT_RUNNING;
4079 r = source_dispatch(p);
2b0c9ef7 4080 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4081 return r;
4082 }
4083
2b0c9ef7 4084 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4085
4086 return 1;
4087}
4088
34b87517 4089static void event_log_delays(sd_event *e) {
442ac269
YW
4090 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
4091 size_t l, i;
34b87517 4092
442ac269
YW
4093 p = b;
4094 l = sizeof(b);
4095 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
4096 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
4097 e->delays[i] = 0;
4098 }
442ac269 4099 log_debug("Event loop iterations: %s", b);
34b87517
VC
4100}
4101
c45a5a74
TG
4102_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
4103 int r;
4104
4105 assert_return(e, -EINVAL);
b937d761 4106 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4107 assert_return(!event_pid_changed(e), -ECHILD);
4108 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 4109 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 4110
e6a7bee5 4111 if (e->profile_delays && e->last_run_usec != 0) {
34b87517
VC
4112 usec_t this_run;
4113 unsigned l;
4114
4115 this_run = now(CLOCK_MONOTONIC);
4116
e6a7bee5 4117 l = u64log2(this_run - e->last_run_usec);
cb9d621e 4118 assert(l < ELEMENTSOF(e->delays));
34b87517
VC
4119 e->delays[l]++;
4120
e6a7bee5 4121 if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
34b87517 4122 event_log_delays(e);
e6a7bee5 4123 e->last_log_usec = this_run;
34b87517
VC
4124 }
4125 }
4126
f814c871
LP
4127 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4128 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
4129
c45a5a74 4130 r = sd_event_prepare(e);
53bac4e0
LP
4131 if (r == 0)
4132 /* There was nothing? Then wait... */
4133 r = sd_event_wait(e, timeout);
c45a5a74 4134
34b87517 4135 if (e->profile_delays)
e6a7bee5 4136 e->last_run_usec = now(CLOCK_MONOTONIC);
34b87517 4137
02d30981 4138 if (r > 0) {
53bac4e0 4139 /* There's something now, then let's dispatch it */
02d30981
TG
4140 r = sd_event_dispatch(e);
4141 if (r < 0)
4142 return r;
53bac4e0
LP
4143
4144 return 1;
4145 }
4146
4147 return r;
c45a5a74
TG
4148}
4149
f7262a9f 4150_public_ int sd_event_loop(sd_event *e) {
fd38203a
LP
4151 int r;
4152
da7e457c 4153 assert_return(e, -EINVAL);
b937d761 4154 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 4155 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 4156 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 4157
f814c871 4158 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
fd38203a 4159
da7e457c 4160 while (e->state != SD_EVENT_FINISHED) {
f5fbe71d 4161 r = sd_event_run(e, UINT64_MAX);
fd38203a 4162 if (r < 0)
30dd293c 4163 return r;
fd38203a
LP
4164 }
4165
30dd293c 4166 return e->exit_code;
fd38203a
LP
4167}
4168
9b364545 4169_public_ int sd_event_get_fd(sd_event *e) {
9b364545 4170 assert_return(e, -EINVAL);
b937d761 4171 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
4172 assert_return(!event_pid_changed(e), -ECHILD);
4173
4174 return e->epoll_fd;
4175}
4176
f7262a9f 4177_public_ int sd_event_get_state(sd_event *e) {
da7e457c 4178 assert_return(e, -EINVAL);
b937d761 4179 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4180 assert_return(!event_pid_changed(e), -ECHILD);
4181
4182 return e->state;
4183}
4184
6203e07a 4185_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 4186 assert_return(e, -EINVAL);
b937d761 4187 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 4188 assert_return(code, -EINVAL);
da7e457c 4189 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4190
6203e07a
LP
4191 if (!e->exit_requested)
4192 return -ENODATA;
4193
4194 *code = e->exit_code;
4195 return 0;
fd38203a
LP
4196}
4197
6203e07a 4198_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 4199 assert_return(e, -EINVAL);
b937d761 4200 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4201 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4202 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4203
6203e07a
LP
4204 e->exit_requested = true;
4205 e->exit_code = code;
4206
fd38203a
LP
4207 return 0;
4208}
46e8c825 4209
6a0f1f6d 4210_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 4211 assert_return(e, -EINVAL);
b937d761 4212 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 4213 assert_return(usec, -EINVAL);
46e8c825
LP
4214 assert_return(!event_pid_changed(e), -ECHILD);
4215
e475d10c
LP
4216 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
4217 return -EOPNOTSUPP;
4218
4219 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
4220 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
4221 * the purpose of getting the time this doesn't matter. */
3411372e
LP
4222 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
4223 return -EOPNOTSUPP;
4224
e475d10c 4225 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 4226 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
4227 *usec = now(clock);
4228 return 1;
4229 }
46e8c825 4230
e475d10c 4231 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
4232 return 0;
4233}
afc6adb5
LP
4234
4235_public_ int sd_event_default(sd_event **ret) {
39883f62 4236 sd_event *e = NULL;
afc6adb5
LP
4237 int r;
4238
4239 if (!ret)
4240 return !!default_event;
4241
4242 if (default_event) {
4243 *ret = sd_event_ref(default_event);
4244 return 0;
4245 }
4246
4247 r = sd_event_new(&e);
4248 if (r < 0)
4249 return r;
4250
4251 e->default_event_ptr = &default_event;
4252 e->tid = gettid();
4253 default_event = e;
4254
4255 *ret = e;
4256 return 1;
4257}
4258
4259_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
4260 assert_return(e, -EINVAL);
b937d761 4261 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 4262 assert_return(tid, -EINVAL);
76b54375 4263 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 4264
76b54375
LP
4265 if (e->tid != 0) {
4266 *tid = e->tid;
4267 return 0;
4268 }
4269
4270 return -ENXIO;
afc6adb5 4271}
cde93897
LP
4272
4273_public_ int sd_event_set_watchdog(sd_event *e, int b) {
4274 int r;
4275
4276 assert_return(e, -EINVAL);
b937d761 4277 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 4278 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
4279
4280 if (e->watchdog == !!b)
4281 return e->watchdog;
4282
4283 if (b) {
09812eb7
LP
4284 r = sd_watchdog_enabled(false, &e->watchdog_period);
4285 if (r <= 0)
cde93897 4286 return r;
cde93897
LP
4287
4288 /* Issue first ping immediately */
4289 sd_notify(false, "WATCHDOG=1");
4290 e->watchdog_last = now(CLOCK_MONOTONIC);
4291
4292 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
4293 if (e->watchdog_fd < 0)
4294 return -errno;
4295
4296 r = arm_watchdog(e);
4297 if (r < 0)
4298 goto fail;
4299
1eac7948 4300 struct epoll_event ev = {
a82f89aa
LP
4301 .events = EPOLLIN,
4302 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
4303 };
cde93897 4304
15c689d7 4305 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
4306 r = -errno;
4307 goto fail;
4308 }
4309
4310 } else {
4311 if (e->watchdog_fd >= 0) {
5a795bff 4312 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 4313 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4314 }
4315 }
4316
4317 e->watchdog = !!b;
4318 return e->watchdog;
4319
4320fail:
03e334a1 4321 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4322 return r;
4323}
8f726607
LP
4324
4325_public_ int sd_event_get_watchdog(sd_event *e) {
4326 assert_return(e, -EINVAL);
b937d761 4327 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
4328 assert_return(!event_pid_changed(e), -ECHILD);
4329
4330 return e->watchdog;
4331}
60a3b1e1
LP
4332
4333_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
4334 assert_return(e, -EINVAL);
b937d761 4335 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
4336 assert_return(!event_pid_changed(e), -ECHILD);
4337
4338 *ret = e->iteration;
4339 return 0;
4340}
15723a1d
LP
4341
4342_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
4343 assert_return(s, -EINVAL);
4344
4345 s->destroy_callback = callback;
4346 return 0;
4347}
4348
4349_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
4350 assert_return(s, -EINVAL);
4351
4352 if (ret)
4353 *ret = s->destroy_callback;
4354
4355 return !!s->destroy_callback;
4356}
2382c936
YW
4357
4358_public_ int sd_event_source_get_floating(sd_event_source *s) {
4359 assert_return(s, -EINVAL);
4360
4361 return s->floating;
4362}
4363
4364_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
4365 assert_return(s, -EINVAL);
4366
4367 if (s->floating == !!b)
4368 return 0;
4369
4370 if (!s->event) /* Already disconnected */
4371 return -ESTALE;
4372
4373 s->floating = b;
4374
4375 if (b) {
4376 sd_event_source_ref(s);
4377 sd_event_unref(s->event);
4378 } else {
4379 sd_event_ref(s->event);
4380 sd_event_source_unref(s);
4381 }
4382
4383 return 1;
4384}
b778cba4
LP
4385
4386_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4387 assert_return(s, -EINVAL);
4388 assert_return(s->type != SOURCE_EXIT, -EDOM);
4389
4390 return s->exit_on_failure;
4391}
4392
4393_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4394 assert_return(s, -EINVAL);
4395 assert_return(s->type != SOURCE_EXIT, -EDOM);
4396
4397 if (s->exit_on_failure == !!b)
4398 return 0;
4399
4400 s->exit_on_failure = b;
4401 return 1;
4402}
b6d5481b
LP
4403
4404_public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) {
4405 int r;
4406
4407 assert_return(s, -EINVAL);
4408
4409 /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
4410 * so is a programming error. */
4411 assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM);
4412
4413 /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
4414 * non-ratelimited. */
4415 r = event_source_leave_ratelimit(s);
4416 if (r < 0)
4417 return r;
4418
4419 s->rate_limit = (RateLimit) { interval, burst };
4420 return 0;
4421}
4422
4423_public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) {
4424 assert_return(s, -EINVAL);
4425
4426 /* Querying whether an event source has ratelimiting configured is not a loggable offsense, hence
4427 * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error */
4428 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4429 return -EDOM;
4430
4431 if (!ratelimit_configured(&s->rate_limit))
4432 return -ENOEXEC;
4433
4434 if (ret_interval)
4435 *ret_interval = s->rate_limit.interval;
4436 if (ret_burst)
4437 *ret_burst = s->rate_limit.burst;
4438
4439 return 0;
4440}
4441
4442_public_ int sd_event_source_is_ratelimited(sd_event_source *s) {
4443 assert_return(s, -EINVAL);
4444
4445 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4446 return false;
4447
4448 if (!ratelimit_configured(&s->rate_limit))
4449 return false;
4450
4451 return s->ratelimited;
4452}