]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
Merge pull request #22791 from keszybz/bootctl-invert-order
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
b6d5481b
LP
40static bool event_source_is_online(sd_event_source *s) {
41 assert(s);
42 return s->enabled != SD_EVENT_OFF && !s->ratelimited;
43}
44
45static bool event_source_is_offline(sd_event_source *s) {
46 assert(s);
47 return s->enabled == SD_EVENT_OFF || s->ratelimited;
48}
49
55cbfaa5
DM
50static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
51 [SOURCE_IO] = "io",
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 63 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
64};
65
66DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67
b6d5481b
LP
68#define EVENT_SOURCE_IS_TIME(t) \
69 IN_SET((t), \
70 SOURCE_TIME_REALTIME, \
71 SOURCE_TIME_BOOTTIME, \
72 SOURCE_TIME_MONOTONIC, \
73 SOURCE_TIME_REALTIME_ALARM, \
74 SOURCE_TIME_BOOTTIME_ALARM)
75
76#define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
77 IN_SET((t), \
78 SOURCE_IO, \
79 SOURCE_TIME_REALTIME, \
80 SOURCE_TIME_BOOTTIME, \
81 SOURCE_TIME_MONOTONIC, \
82 SOURCE_TIME_REALTIME_ALARM, \
83 SOURCE_TIME_BOOTTIME_ALARM, \
84 SOURCE_SIGNAL, \
85 SOURCE_DEFER, \
86 SOURCE_INOTIFY)
6a0f1f6d 87
19947509
ZJS
88/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
89 * Time sources and ratelimited sources can be passed, so effectively this is the same as the
90 * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */
91#define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t)
92
fd38203a 93struct sd_event {
da7e457c 94 unsigned n_ref;
fd38203a
LP
95
96 int epoll_fd;
cde93897 97 int watchdog_fd;
fd38203a
LP
98
99 Prioq *pending;
100 Prioq *prepare;
c2ba3ad6 101
a8548816 102 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
103 * can add support for more clocks when the kernel learns to
104 * deal with them, too. */
105 struct clock_data realtime;
a8548816 106 struct clock_data boottime;
6a0f1f6d
LP
107 struct clock_data monotonic;
108 struct clock_data realtime_alarm;
109 struct clock_data boottime_alarm;
fd38203a 110
da7e457c
LP
111 usec_t perturb;
112
9da4cb2b
LP
113 sd_event_source **signal_sources; /* indexed by signal number */
114 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
115
116 Hashmap *child_sources;
b6d5481b 117 unsigned n_online_child_sources;
fd38203a 118
6e9feda3
LP
119 Set *post_sources;
120
6203e07a 121 Prioq *exit;
fd38203a 122
97ef5391
LP
123 Hashmap *inotify_data; /* indexed by priority */
124
125 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
126 LIST_HEAD(struct inode_data, inode_data_to_close);
127
128 /* A list of inotify objects that already have events buffered which aren't processed yet */
129 LIST_HEAD(struct inotify_data, inotify_data_buffered);
130
da7e457c 131 pid_t original_pid;
c2ba3ad6 132
60a3b1e1 133 uint64_t iteration;
e475d10c 134 triple_timestamp timestamp;
da7e457c 135 int state;
eaa3cbef 136
6203e07a 137 bool exit_requested:1;
da7e457c 138 bool need_process_child:1;
cde93897 139 bool watchdog:1;
34b87517 140 bool profile_delays:1;
afc6adb5 141
6203e07a
LP
142 int exit_code;
143
afc6adb5
LP
144 pid_t tid;
145 sd_event **default_event_ptr;
cde93897
LP
146
147 usec_t watchdog_last, watchdog_period;
15b38f93
LP
148
149 unsigned n_sources;
a71fe8b8 150
5cddd924 151 struct epoll_event *event_queue;
5cddd924 152
a71fe8b8 153 LIST_HEAD(sd_event_source, sources);
34b87517 154
e6a7bee5 155 usec_t last_run_usec, last_log_usec;
34b87517 156 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
157};
158
b937d761
NM
159static thread_local sd_event *default_event = NULL;
160
a71fe8b8 161static void source_disconnect(sd_event_source *s);
97ef5391 162static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 163
b937d761
NM
164static sd_event *event_resolve(sd_event *e) {
165 return e == SD_EVENT_DEFAULT ? default_event : e;
166}
167
fd38203a
LP
168static int pending_prioq_compare(const void *a, const void *b) {
169 const sd_event_source *x = a, *y = b;
9c57a73b 170 int r;
fd38203a
LP
171
172 assert(x->pending);
173 assert(y->pending);
174
baf76283 175 /* Enabled ones first */
06e13147
YW
176 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
177 if (r != 0)
178 return r;
fd38203a 179
b6d5481b
LP
180 /* Non rate-limited ones first. */
181 r = CMP(!!x->ratelimited, !!y->ratelimited);
182 if (r != 0)
183 return r;
184
fd38203a 185 /* Lower priority values first */
9c57a73b
YW
186 r = CMP(x->priority, y->priority);
187 if (r != 0)
188 return r;
fd38203a
LP
189
190 /* Older entries first */
9c57a73b 191 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
192}
193
194static int prepare_prioq_compare(const void *a, const void *b) {
195 const sd_event_source *x = a, *y = b;
9c57a73b 196 int r;
fd38203a
LP
197
198 assert(x->prepare);
199 assert(y->prepare);
200
8046c457 201 /* Enabled ones first */
06e13147
YW
202 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
203 if (r != 0)
204 return r;
8046c457 205
b6d5481b
LP
206 /* Non rate-limited ones first. */
207 r = CMP(!!x->ratelimited, !!y->ratelimited);
208 if (r != 0)
209 return r;
210
fd38203a
LP
211 /* Move most recently prepared ones last, so that we can stop
212 * preparing as soon as we hit one that has already been
213 * prepared in the current iteration */
9c57a73b
YW
214 r = CMP(x->prepare_iteration, y->prepare_iteration);
215 if (r != 0)
216 return r;
fd38203a 217
fd38203a 218 /* Lower priority values first */
9c57a73b 219 return CMP(x->priority, y->priority);
fd38203a
LP
220}
221
b6d5481b
LP
222static usec_t time_event_source_next(const sd_event_source *s) {
223 assert(s);
224
225 /* We have two kinds of event sources that have elapsation times associated with them: the actual
226 * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
227 * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
228 * looking at here. */
229
230 if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
231 assert(s->rate_limit.begin != 0);
232 assert(s->rate_limit.interval != 0);
233 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
234 }
235
236 /* Otherwise this must be a time event source, if not ratelimited */
237 if (EVENT_SOURCE_IS_TIME(s->type))
238 return s->time.next;
239
240 return USEC_INFINITY;
241}
242
1bce0ffa 243static usec_t time_event_source_latest(const sd_event_source *s) {
b6d5481b
LP
244 assert(s);
245
246 if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the
247 * same, as we should avoid adding additional inaccuracy on an inaccuracy time
248 * window */
249 assert(s->rate_limit.begin != 0);
250 assert(s->rate_limit.interval != 0);
251 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
252 }
253
254 /* Must be a time event source, if not ratelimited */
255 if (EVENT_SOURCE_IS_TIME(s->type))
256 return usec_add(s->time.next, s->time.accuracy);
257
258 return USEC_INFINITY;
1bce0ffa
LP
259}
260
81107b84
LP
261static bool event_source_timer_candidate(const sd_event_source *s) {
262 assert(s);
263
264 /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
265 * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
266 return !s->pending || s->ratelimited;
267}
268
269static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) {
c2ba3ad6 270 const sd_event_source *x = a, *y = b;
06e13147 271 int r;
c2ba3ad6 272
baf76283 273 /* Enabled ones first */
06e13147
YW
274 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
275 if (r != 0)
276 return r;
c2ba3ad6 277
81107b84 278 /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
06e13147
YW
279 r = CMP(!event_source_timer_candidate(x), !event_source_timer_candidate(y));
280 if (r != 0)
281 return r;
c2ba3ad6
LP
282
283 /* Order by time */
81107b84
LP
284 return CMP(time_func(x), time_func(y));
285}
286
287static int earliest_time_prioq_compare(const void *a, const void *b) {
288 return time_prioq_compare(a, b, time_event_source_next);
289}
290
291static int latest_time_prioq_compare(const void *a, const void *b) {
292 return time_prioq_compare(a, b, time_event_source_latest);
c2ba3ad6
LP
293}
294
6203e07a 295static int exit_prioq_compare(const void *a, const void *b) {
da7e457c 296 const sd_event_source *x = a, *y = b;
06e13147 297 int r;
da7e457c 298
6203e07a
LP
299 assert(x->type == SOURCE_EXIT);
300 assert(y->type == SOURCE_EXIT);
da7e457c 301
baf76283 302 /* Enabled ones first */
06e13147
YW
303 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
304 if (r != 0)
305 return r;
da7e457c
LP
306
307 /* Lower priority values first */
6dd91b36 308 return CMP(x->priority, y->priority);
da7e457c
LP
309}
310
6a0f1f6d
LP
311static void free_clock_data(struct clock_data *d) {
312 assert(d);
9da4cb2b 313 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
314
315 safe_close(d->fd);
316 prioq_free(d->earliest);
317 prioq_free(d->latest);
318}
319
8301aa0b 320static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
321 sd_event_source *s;
322
fd38203a 323 assert(e);
a71fe8b8
LP
324
325 while ((s = e->sources)) {
326 assert(s->floating);
327 source_disconnect(s);
328 sd_event_source_unref(s);
329 }
330
15b38f93 331 assert(e->n_sources == 0);
fd38203a 332
afc6adb5
LP
333 if (e->default_event_ptr)
334 *(e->default_event_ptr) = NULL;
335
03e334a1 336 safe_close(e->epoll_fd);
03e334a1 337 safe_close(e->watchdog_fd);
cde93897 338
6a0f1f6d 339 free_clock_data(&e->realtime);
a8548816 340 free_clock_data(&e->boottime);
6a0f1f6d
LP
341 free_clock_data(&e->monotonic);
342 free_clock_data(&e->realtime_alarm);
343 free_clock_data(&e->boottime_alarm);
344
fd38203a
LP
345 prioq_free(e->pending);
346 prioq_free(e->prepare);
6203e07a 347 prioq_free(e->exit);
fd38203a
LP
348
349 free(e->signal_sources);
9da4cb2b 350 hashmap_free(e->signal_data);
fd38203a 351
97ef5391
LP
352 hashmap_free(e->inotify_data);
353
fd38203a 354 hashmap_free(e->child_sources);
6e9feda3 355 set_free(e->post_sources);
8301aa0b 356
5cddd924
LP
357 free(e->event_queue);
358
8301aa0b 359 return mfree(e);
fd38203a
LP
360}
361
f7262a9f 362_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
363 sd_event *e;
364 int r;
365
305f78bf 366 assert_return(ret, -EINVAL);
fd38203a 367
d08eb1fa 368 e = new(sd_event, 1);
fd38203a
LP
369 if (!e)
370 return -ENOMEM;
371
d08eb1fa
LP
372 *e = (sd_event) {
373 .n_ref = 1,
374 .epoll_fd = -1,
375 .watchdog_fd = -1,
376 .realtime.wakeup = WAKEUP_CLOCK_DATA,
377 .realtime.fd = -1,
378 .realtime.next = USEC_INFINITY,
379 .boottime.wakeup = WAKEUP_CLOCK_DATA,
380 .boottime.fd = -1,
381 .boottime.next = USEC_INFINITY,
382 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
383 .monotonic.fd = -1,
384 .monotonic.next = USEC_INFINITY,
385 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
386 .realtime_alarm.fd = -1,
387 .realtime_alarm.next = USEC_INFINITY,
388 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
389 .boottime_alarm.fd = -1,
390 .boottime_alarm.next = USEC_INFINITY,
391 .perturb = USEC_INFINITY,
392 .original_pid = getpid_cached(),
393 };
fd38203a 394
c983e776
EV
395 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
396 if (r < 0)
fd38203a 397 goto fail;
fd38203a
LP
398
399 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
400 if (e->epoll_fd < 0) {
401 r = -errno;
402 goto fail;
403 }
404
7fe2903c
LP
405 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
406
34b87517 407 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
1d3a473b 408 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 … 2^63 us will be logged every 5s.");
34b87517
VC
409 e->profile_delays = true;
410 }
411
fd38203a
LP
412 *ret = e;
413 return 0;
414
415fail:
416 event_free(e);
417 return r;
418}
419
8301aa0b 420DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 421
afd15bbb
ZJS
422_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
423 if (s)
424 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
425 return sd_event_source_unref(s);
426}
427
eaa3cbef
LP
428static bool event_pid_changed(sd_event *e) {
429 assert(e);
430
a2360a46 431 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
432 * it around over a fork(). Let's complain. */
433
df0ff127 434 return e->original_pid != getpid_cached();
eaa3cbef
LP
435}
436
366e6411 437static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
438 assert(s);
439 assert(s->type == SOURCE_IO);
440
f6806734 441 if (event_pid_changed(s->event))
366e6411 442 return;
f6806734 443
fd38203a 444 if (!s->io.registered)
366e6411 445 return;
fd38203a 446
d1cf2023 447 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
f80a5d6a 448 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
55cbfaa5 449 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
450
451 s->io.registered = false;
fd38203a
LP
452}
453
305f78bf
LP
454static int source_io_register(
455 sd_event_source *s,
456 int enabled,
457 uint32_t events) {
458
fd38203a
LP
459 assert(s);
460 assert(s->type == SOURCE_IO);
baf76283 461 assert(enabled != SD_EVENT_OFF);
fd38203a 462
1eac7948 463 struct epoll_event ev = {
a82f89aa
LP
464 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
465 .data.ptr = s,
466 };
fd38203a 467
15c689d7 468 if (epoll_ctl(s->event->epoll_fd,
1eac7948 469 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
55c540d3 470 s->io.fd, &ev) < 0)
fd38203a
LP
471 return -errno;
472
473 s->io.registered = true;
474
475 return 0;
476}
477
f8f3f926
LP
478static void source_child_pidfd_unregister(sd_event_source *s) {
479 assert(s);
480 assert(s->type == SOURCE_CHILD);
481
482 if (event_pid_changed(s->event))
483 return;
484
485 if (!s->child.registered)
486 return;
487
488 if (EVENT_SOURCE_WATCH_PIDFD(s))
489 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
f80a5d6a 490 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
f8f3f926
LP
491 strna(s->description), event_source_type_to_string(s->type));
492
493 s->child.registered = false;
494}
495
496static int source_child_pidfd_register(sd_event_source *s, int enabled) {
f8f3f926
LP
497 assert(s);
498 assert(s->type == SOURCE_CHILD);
499 assert(enabled != SD_EVENT_OFF);
500
501 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 502 struct epoll_event ev = {
f8f3f926
LP
503 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
504 .data.ptr = s,
505 };
506
55c540d3
ZJS
507 if (epoll_ctl(s->event->epoll_fd,
508 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
509 s->child.pidfd, &ev) < 0)
f8f3f926
LP
510 return -errno;
511 }
512
513 s->child.registered = true;
514 return 0;
515}
516
6a0f1f6d
LP
517static clockid_t event_source_type_to_clock(EventSourceType t) {
518
519 switch (t) {
520
521 case SOURCE_TIME_REALTIME:
522 return CLOCK_REALTIME;
523
a8548816
TG
524 case SOURCE_TIME_BOOTTIME:
525 return CLOCK_BOOTTIME;
526
6a0f1f6d
LP
527 case SOURCE_TIME_MONOTONIC:
528 return CLOCK_MONOTONIC;
529
530 case SOURCE_TIME_REALTIME_ALARM:
531 return CLOCK_REALTIME_ALARM;
532
533 case SOURCE_TIME_BOOTTIME_ALARM:
534 return CLOCK_BOOTTIME_ALARM;
535
536 default:
537 return (clockid_t) -1;
538 }
539}
540
541static EventSourceType clock_to_event_source_type(clockid_t clock) {
542
543 switch (clock) {
544
545 case CLOCK_REALTIME:
546 return SOURCE_TIME_REALTIME;
547
a8548816
TG
548 case CLOCK_BOOTTIME:
549 return SOURCE_TIME_BOOTTIME;
550
6a0f1f6d
LP
551 case CLOCK_MONOTONIC:
552 return SOURCE_TIME_MONOTONIC;
553
554 case CLOCK_REALTIME_ALARM:
555 return SOURCE_TIME_REALTIME_ALARM;
556
557 case CLOCK_BOOTTIME_ALARM:
558 return SOURCE_TIME_BOOTTIME_ALARM;
559
560 default:
561 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
562 }
563}
564
565static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
566 assert(e);
567
568 switch (t) {
569
570 case SOURCE_TIME_REALTIME:
571 return &e->realtime;
572
a8548816
TG
573 case SOURCE_TIME_BOOTTIME:
574 return &e->boottime;
575
6a0f1f6d
LP
576 case SOURCE_TIME_MONOTONIC:
577 return &e->monotonic;
578
579 case SOURCE_TIME_REALTIME_ALARM:
580 return &e->realtime_alarm;
581
582 case SOURCE_TIME_BOOTTIME_ALARM:
583 return &e->boottime_alarm;
584
585 default:
586 return NULL;
587 }
588}
589
3e4eb8e7
YW
590static void event_free_signal_data(sd_event *e, struct signal_data *d) {
591 assert(e);
592
593 if (!d)
594 return;
595
596 hashmap_remove(e->signal_data, &d->priority);
597 safe_close(d->fd);
598 free(d);
599}
600
9da4cb2b
LP
601static int event_make_signal_data(
602 sd_event *e,
603 int sig,
604 struct signal_data **ret) {
4807d2d0 605
9da4cb2b
LP
606 struct signal_data *d;
607 bool added = false;
608 sigset_t ss_copy;
609 int64_t priority;
f95387cd
ZJS
610 int r;
611
612 assert(e);
613
f6806734 614 if (event_pid_changed(e))
9da4cb2b 615 return -ECHILD;
f6806734 616
9da4cb2b
LP
617 if (e->signal_sources && e->signal_sources[sig])
618 priority = e->signal_sources[sig]->priority;
619 else
de05913d 620 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 621
9da4cb2b
LP
622 d = hashmap_get(e->signal_data, &priority);
623 if (d) {
624 if (sigismember(&d->sigset, sig) > 0) {
625 if (ret)
626 *ret = d;
627 return 0;
628 }
629 } else {
d08eb1fa 630 d = new(struct signal_data, 1);
9da4cb2b
LP
631 if (!d)
632 return -ENOMEM;
633
d08eb1fa
LP
634 *d = (struct signal_data) {
635 .wakeup = WAKEUP_SIGNAL_DATA,
636 .fd = -1,
637 .priority = priority,
638 };
9da4cb2b 639
f656fdb6 640 r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d);
90f604d1
ZJS
641 if (r < 0) {
642 free(d);
9da4cb2b 643 return r;
90f604d1 644 }
f95387cd 645
9da4cb2b
LP
646 added = true;
647 }
648
649 ss_copy = d->sigset;
650 assert_se(sigaddset(&ss_copy, sig) >= 0);
651
652 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
653 if (r < 0) {
654 r = -errno;
655 goto fail;
656 }
657
658 d->sigset = ss_copy;
f95387cd 659
9da4cb2b
LP
660 if (d->fd >= 0) {
661 if (ret)
662 *ret = d;
f95387cd 663 return 0;
9da4cb2b
LP
664 }
665
7fe2903c 666 d->fd = fd_move_above_stdio(r);
f95387cd 667
1eac7948 668 struct epoll_event ev = {
a82f89aa
LP
669 .events = EPOLLIN,
670 .data.ptr = d,
671 };
f95387cd 672
15c689d7 673 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
674 r = -errno;
675 goto fail;
f95387cd
ZJS
676 }
677
9da4cb2b
LP
678 if (ret)
679 *ret = d;
680
f95387cd 681 return 0;
9da4cb2b
LP
682
683fail:
3e4eb8e7
YW
684 if (added)
685 event_free_signal_data(e, d);
9da4cb2b
LP
686
687 return r;
688}
689
690static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
691 assert(e);
692 assert(d);
693
694 /* Turns off the specified signal in the signal data
695 * object. If the signal mask of the object becomes empty that
696 * way removes it. */
697
698 if (sigismember(&d->sigset, sig) == 0)
699 return;
700
701 assert_se(sigdelset(&d->sigset, sig) >= 0);
702
703 if (sigisemptyset(&d->sigset)) {
9da4cb2b 704 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 705 event_free_signal_data(e, d);
9da4cb2b
LP
706 return;
707 }
708
709 assert(d->fd >= 0);
710
711 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
712 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
713}
714
715static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
716 struct signal_data *d;
717 static const int64_t zero_priority = 0;
718
719 assert(e);
720
f8f3f926
LP
721 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
722 * and possibly drop the signalfd for it. */
9da4cb2b
LP
723
724 if (sig == SIGCHLD &&
b6d5481b 725 e->n_online_child_sources > 0)
9da4cb2b
LP
726 return;
727
728 if (e->signal_sources &&
729 e->signal_sources[sig] &&
b6d5481b 730 event_source_is_online(e->signal_sources[sig]))
9da4cb2b
LP
731 return;
732
733 /*
734 * The specified signal might be enabled in three different queues:
735 *
736 * 1) the one that belongs to the priority passed (if it is non-NULL)
737 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
738 * 3) the 0 priority (to cover the SIGCHLD case)
739 *
740 * Hence, let's remove it from all three here.
741 */
742
743 if (priority) {
744 d = hashmap_get(e->signal_data, priority);
745 if (d)
746 event_unmask_signal_data(e, d, sig);
747 }
748
749 if (e->signal_sources && e->signal_sources[sig]) {
750 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
751 if (d)
752 event_unmask_signal_data(e, d, sig);
753 }
754
755 d = hashmap_get(e->signal_data, &zero_priority);
756 if (d)
757 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
758}
759
e1951c16
MS
760static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
761 assert(s);
762
763 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
764 * they are enabled/disabled or marked pending and such. */
765
766 if (s->pending)
767 prioq_reshuffle(s->event->pending, s, &s->pending_index);
768
769 if (s->prepare)
770 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
771}
772
773static void event_source_time_prioq_reshuffle(sd_event_source *s) {
774 struct clock_data *d;
775
776 assert(s);
e1951c16
MS
777
778 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
5c08c7ab
YW
779 * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered
780 * properly again. */
b6d5481b
LP
781
782 if (s->ratelimited)
783 d = &s->event->monotonic;
5c08c7ab 784 else if (EVENT_SOURCE_IS_TIME(s->type))
b6d5481b 785 assert_se(d = event_get_clock_data(s->event, s->type));
5c08c7ab
YW
786 else
787 return; /* no-op for an event source which is neither a timer nor ratelimited. */
b6d5481b 788
f41315fc
LP
789 prioq_reshuffle(d->earliest, s, &s->earliest_index);
790 prioq_reshuffle(d->latest, s, &s->latest_index);
e1951c16
MS
791 d->needs_rearm = true;
792}
793
1e45e3fe
LP
794static void event_source_time_prioq_remove(
795 sd_event_source *s,
796 struct clock_data *d) {
797
798 assert(s);
799 assert(d);
800
f41315fc
LP
801 prioq_remove(d->earliest, s, &s->earliest_index);
802 prioq_remove(d->latest, s, &s->latest_index);
803 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
804 d->needs_rearm = true;
805}
806
a71fe8b8
LP
807static void source_disconnect(sd_event_source *s) {
808 sd_event *event;
809
fd38203a
LP
810 assert(s);
811
a71fe8b8
LP
812 if (!s->event)
813 return;
15b38f93 814
a71fe8b8 815 assert(s->event->n_sources > 0);
fd38203a 816
a71fe8b8 817 switch (s->type) {
fd38203a 818
a71fe8b8
LP
819 case SOURCE_IO:
820 if (s->io.fd >= 0)
821 source_io_unregister(s);
fd38203a 822
a71fe8b8 823 break;
6a0f1f6d 824
a71fe8b8 825 case SOURCE_TIME_REALTIME:
a8548816 826 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
827 case SOURCE_TIME_MONOTONIC:
828 case SOURCE_TIME_REALTIME_ALARM:
b6d5481b
LP
829 case SOURCE_TIME_BOOTTIME_ALARM:
830 /* Only remove this event source from the time event source here if it is not ratelimited. If
831 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
832 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
833
834 if (!s->ratelimited) {
835 struct clock_data *d;
836 assert_se(d = event_get_clock_data(s->event, s->type));
837 event_source_time_prioq_remove(s, d);
838 }
839
a71fe8b8 840 break;
a71fe8b8
LP
841
842 case SOURCE_SIGNAL:
843 if (s->signal.sig > 0) {
9da4cb2b 844
a71fe8b8
LP
845 if (s->event->signal_sources)
846 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 847
9da4cb2b 848 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 849 }
fd38203a 850
a71fe8b8 851 break;
fd38203a 852
a71fe8b8
LP
853 case SOURCE_CHILD:
854 if (s->child.pid > 0) {
b6d5481b
LP
855 if (event_source_is_online(s)) {
856 assert(s->event->n_online_child_sources > 0);
857 s->event->n_online_child_sources--;
4807d2d0 858 }
fd38203a 859
4a0b58c4 860 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 861 }
fd38203a 862
f8f3f926
LP
863 if (EVENT_SOURCE_WATCH_PIDFD(s))
864 source_child_pidfd_unregister(s);
865 else
866 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
867
a71fe8b8 868 break;
fd38203a 869
a71fe8b8
LP
870 case SOURCE_DEFER:
871 /* nothing */
872 break;
fd38203a 873
a71fe8b8
LP
874 case SOURCE_POST:
875 set_remove(s->event->post_sources, s);
876 break;
da7e457c 877
a71fe8b8
LP
878 case SOURCE_EXIT:
879 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
880 break;
0eb2e0e3 881
97ef5391
LP
882 case SOURCE_INOTIFY: {
883 struct inode_data *inode_data;
884
885 inode_data = s->inotify.inode_data;
886 if (inode_data) {
887 struct inotify_data *inotify_data;
888 assert_se(inotify_data = inode_data->inotify_data);
889
890 /* Detach this event source from the inode object */
891 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
892 s->inotify.inode_data = NULL;
893
894 if (s->pending) {
895 assert(inotify_data->n_pending > 0);
896 inotify_data->n_pending--;
897 }
898
899 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
900 * continued to being watched. That's because inotify doesn't really have an API for that: we
901 * can only change watch masks with access to the original inode either by fd or by path. But
902 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 903 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
904 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
905 * there), but given the need for open_by_handle_at() which is privileged and not universally
906 * available this would be quite an incomplete solution. Hence we go the other way, leave the
907 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
908 * anymore after reception. Yes, this sucks, but … Linux … */
909
910 /* Maybe release the inode data (and its inotify) */
911 event_gc_inode_data(s->event, inode_data);
912 }
913
914 break;
915 }
916
a71fe8b8 917 default:
04499a70 918 assert_not_reached();
a71fe8b8 919 }
6e9feda3 920
a71fe8b8
LP
921 if (s->pending)
922 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 923
a71fe8b8
LP
924 if (s->prepare)
925 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 926
b6d5481b
LP
927 if (s->ratelimited)
928 event_source_time_prioq_remove(s, &s->event->monotonic);
929
e514aa1e 930 event = TAKE_PTR(s->event);
a71fe8b8
LP
931 LIST_REMOVE(sources, event->sources, s);
932 event->n_sources--;
fd38203a 933
f5982559
LP
934 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
935 * pidfd associated with this event source, which we'll do only on source_free(). */
936
a71fe8b8
LP
937 if (!s->floating)
938 sd_event_unref(event);
939}
940
75db809a 941static sd_event_source* source_free(sd_event_source *s) {
a71fe8b8 942 assert(s);
fd38203a 943
a71fe8b8 944 source_disconnect(s);
ab93297c
NM
945
946 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
947 s->io.fd = safe_close(s->io.fd);
948
f8f3f926
LP
949 if (s->type == SOURCE_CHILD) {
950 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
951
952 if (s->child.process_owned) {
953
954 if (!s->child.exited) {
955 bool sent = false;
956
957 if (s->child.pidfd >= 0) {
958 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
959 if (errno == ESRCH) /* Already dead */
960 sent = true;
961 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
962 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
963 s->child.pid);
964 } else
965 sent = true;
966 }
967
968 if (!sent)
969 if (kill(s->child.pid, SIGKILL) < 0)
970 if (errno != ESRCH) /* Already dead */
971 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
972 s->child.pid);
973 }
974
975 if (!s->child.waited) {
976 siginfo_t si = {};
977
978 /* Reap the child if we can */
979 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
980 }
981 }
982
983 if (s->child.pidfd_owned)
984 s->child.pidfd = safe_close(s->child.pidfd);
985 }
986
15723a1d
LP
987 if (s->destroy_callback)
988 s->destroy_callback(s->userdata);
ab93297c 989
356779df 990 free(s->description);
75db809a 991 return mfree(s);
fd38203a 992}
8c75fe17 993DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
994
995static int source_set_pending(sd_event_source *s, bool b) {
996 int r;
997
998 assert(s);
6203e07a 999 assert(s->type != SOURCE_EXIT);
fd38203a
LP
1000
1001 if (s->pending == b)
1002 return 0;
1003
1004 s->pending = b;
1005
1006 if (b) {
1007 s->pending_iteration = s->event->iteration;
1008
1009 r = prioq_put(s->event->pending, s, &s->pending_index);
1010 if (r < 0) {
1011 s->pending = false;
1012 return r;
1013 }
1014 } else
1015 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1016
e1951c16
MS
1017 if (EVENT_SOURCE_IS_TIME(s->type))
1018 event_source_time_prioq_reshuffle(s);
2576a19e 1019
9da4cb2b
LP
1020 if (s->type == SOURCE_SIGNAL && !b) {
1021 struct signal_data *d;
1022
1023 d = hashmap_get(s->event->signal_data, &s->priority);
1024 if (d && d->current == s)
1025 d->current = NULL;
1026 }
1027
97ef5391
LP
1028 if (s->type == SOURCE_INOTIFY) {
1029
1030 assert(s->inotify.inode_data);
1031 assert(s->inotify.inode_data->inotify_data);
1032
1033 if (b)
1034 s->inotify.inode_data->inotify_data->n_pending ++;
1035 else {
1036 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1037 s->inotify.inode_data->inotify_data->n_pending --;
1038 }
1039 }
1040
efd3be9d 1041 return 1;
fd38203a
LP
1042}
1043
a71fe8b8 1044static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
1045 sd_event_source *s;
1046
1047 assert(e);
1048
d08eb1fa 1049 s = new(sd_event_source, 1);
fd38203a
LP
1050 if (!s)
1051 return NULL;
1052
d08eb1fa
LP
1053 *s = (struct sd_event_source) {
1054 .n_ref = 1,
1055 .event = e,
1056 .floating = floating,
1057 .type = type,
1058 .pending_index = PRIOQ_IDX_NULL,
1059 .prepare_index = PRIOQ_IDX_NULL,
1060 };
a71fe8b8
LP
1061
1062 if (!floating)
1063 sd_event_ref(e);
fd38203a 1064
a71fe8b8 1065 LIST_PREPEND(sources, e->sources, s);
313cefa1 1066 e->n_sources++;
15b38f93 1067
fd38203a
LP
1068 return s;
1069}
1070
b9350e70
LP
1071static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1072 assert(s);
1073
1074 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1075}
1076
f7262a9f 1077_public_ int sd_event_add_io(
fd38203a 1078 sd_event *e,
151b9b96 1079 sd_event_source **ret,
fd38203a
LP
1080 int fd,
1081 uint32_t events,
718db961 1082 sd_event_io_handler_t callback,
151b9b96 1083 void *userdata) {
fd38203a 1084
ec766a51 1085 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1086 int r;
1087
305f78bf 1088 assert_return(e, -EINVAL);
b937d761 1089 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 1090 assert_return(fd >= 0, -EBADF);
2a16a986 1091 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1092 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1093 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1094
b9350e70
LP
1095 if (!callback)
1096 callback = io_exit_callback;
1097
a71fe8b8 1098 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1099 if (!s)
1100 return -ENOMEM;
1101
9da4cb2b 1102 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1103 s->io.fd = fd;
1104 s->io.events = events;
1105 s->io.callback = callback;
1106 s->userdata = userdata;
baf76283 1107 s->enabled = SD_EVENT_ON;
fd38203a 1108
baf76283 1109 r = source_io_register(s, s->enabled, events);
ec766a51 1110 if (r < 0)
050f74f2 1111 return r;
fd38203a 1112
a71fe8b8
LP
1113 if (ret)
1114 *ret = s;
ec766a51 1115 TAKE_PTR(s);
a71fe8b8 1116
fd38203a
LP
1117 return 0;
1118}
1119
52444dc4
LP
1120static void initialize_perturb(sd_event *e) {
1121 sd_id128_t bootid = {};
1122
1123 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1124 the same time within each minute/second/250ms, so that
52444dc4
LP
1125 events all across the system can be coalesced into a single
1126 CPU wakeup. However, let's take some system-specific
1127 randomness for this value, so that in a network of systems
1128 with synced clocks timer events are distributed a
1129 bit. Here, we calculate a perturbation usec offset from the
1130 boot ID. */
1131
3a43da28 1132 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1133 return;
1134
1135 if (sd_id128_get_boot(&bootid) >= 0)
1136 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1137}
1138
fd38203a
LP
1139static int event_setup_timer_fd(
1140 sd_event *e,
6a0f1f6d
LP
1141 struct clock_data *d,
1142 clockid_t clock) {
fd38203a 1143
fd38203a 1144 assert(e);
6a0f1f6d 1145 assert(d);
fd38203a 1146
6a0f1f6d 1147 if (_likely_(d->fd >= 0))
fd38203a
LP
1148 return 0;
1149
b44d87e2 1150 _cleanup_close_ int fd = -1;
b44d87e2 1151
6a0f1f6d 1152 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1153 if (fd < 0)
1154 return -errno;
1155
7fe2903c
LP
1156 fd = fd_move_above_stdio(fd);
1157
1eac7948 1158 struct epoll_event ev = {
a82f89aa
LP
1159 .events = EPOLLIN,
1160 .data.ptr = d,
1161 };
fd38203a 1162
15c689d7 1163 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1164 return -errno;
fd38203a 1165
b44d87e2 1166 d->fd = TAKE_FD(fd);
fd38203a
LP
1167 return 0;
1168}
1169
c4f1aff2
TG
1170static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1171 assert(s);
1172
1173 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1174}
1175
41c63f36
LP
1176static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1177 int r;
1178
1179 assert(d);
1180
1181 if (d->fd < 0) {
1182 r = event_setup_timer_fd(e, d, clock);
1183 if (r < 0)
1184 return r;
1185 }
1186
1187 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1188 if (r < 0)
1189 return r;
1190
1191 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1192 if (r < 0)
1193 return r;
1194
1195 return 0;
1196}
1197
1e45e3fe
LP
1198static int event_source_time_prioq_put(
1199 sd_event_source *s,
1200 struct clock_data *d) {
1201
1202 int r;
1203
1204 assert(s);
1205 assert(d);
19947509 1206 assert(EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
1e45e3fe 1207
f41315fc 1208 r = prioq_put(d->earliest, s, &s->earliest_index);
1e45e3fe
LP
1209 if (r < 0)
1210 return r;
1211
f41315fc 1212 r = prioq_put(d->latest, s, &s->latest_index);
1e45e3fe 1213 if (r < 0) {
f41315fc
LP
1214 assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
1215 s->earliest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
1216 return r;
1217 }
1218
1219 d->needs_rearm = true;
1220 return 0;
1221}
1222
6a0f1f6d 1223_public_ int sd_event_add_time(
fd38203a 1224 sd_event *e,
151b9b96 1225 sd_event_source **ret,
6a0f1f6d 1226 clockid_t clock,
fd38203a 1227 uint64_t usec,
c2ba3ad6 1228 uint64_t accuracy,
718db961 1229 sd_event_time_handler_t callback,
151b9b96 1230 void *userdata) {
fd38203a 1231
6a0f1f6d 1232 EventSourceType type;
ec766a51 1233 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1234 struct clock_data *d;
fd38203a
LP
1235 int r;
1236
305f78bf 1237 assert_return(e, -EINVAL);
b937d761 1238 assert_return(e = event_resolve(e), -ENOPKG);
f5fbe71d 1239 assert_return(accuracy != UINT64_MAX, -EINVAL);
da7e457c 1240 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1241 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1242
e475d10c
LP
1243 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1244 return -EOPNOTSUPP;
1245
1246 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1247 if (type < 0)
3411372e
LP
1248 return -EOPNOTSUPP;
1249
c4f1aff2
TG
1250 if (!callback)
1251 callback = time_exit_callback;
1252
1e45e3fe 1253 assert_se(d = event_get_clock_data(e, type));
c2ba3ad6 1254
41c63f36 1255 r = setup_clock_data(e, d, clock);
c983e776
EV
1256 if (r < 0)
1257 return r;
fd38203a 1258
a71fe8b8 1259 s = source_new(e, !ret, type);
fd38203a
LP
1260 if (!s)
1261 return -ENOMEM;
1262
1263 s->time.next = usec;
c2ba3ad6 1264 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1265 s->time.callback = callback;
f41315fc 1266 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
fd38203a 1267 s->userdata = userdata;
baf76283 1268 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1269
1e45e3fe 1270 r = event_source_time_prioq_put(s, d);
c2ba3ad6 1271 if (r < 0)
ec766a51 1272 return r;
fd38203a 1273
a71fe8b8
LP
1274 if (ret)
1275 *ret = s;
ec766a51 1276 TAKE_PTR(s);
a71fe8b8 1277
fd38203a
LP
1278 return 0;
1279}
1280
d6a83dc4
LP
1281_public_ int sd_event_add_time_relative(
1282 sd_event *e,
1283 sd_event_source **ret,
1284 clockid_t clock,
1285 uint64_t usec,
1286 uint64_t accuracy,
1287 sd_event_time_handler_t callback,
1288 void *userdata) {
1289
1290 usec_t t;
1291 int r;
1292
1293 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1294 * checks for overflow. */
1295
1296 r = sd_event_now(e, clock, &t);
1297 if (r < 0)
1298 return r;
1299
1300 if (usec >= USEC_INFINITY - t)
1301 return -EOVERFLOW;
1302
1303 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1304}
1305
59bc1fd7
LP
1306static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1307 assert(s);
1308
1309 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1310}
1311
f7262a9f 1312_public_ int sd_event_add_signal(
305f78bf 1313 sd_event *e,
151b9b96 1314 sd_event_source **ret,
305f78bf 1315 int sig,
718db961 1316 sd_event_signal_handler_t callback,
151b9b96 1317 void *userdata) {
305f78bf 1318
ec766a51 1319 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1320 struct signal_data *d;
fd38203a
LP
1321 int r;
1322
305f78bf 1323 assert_return(e, -EINVAL);
b937d761 1324 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1325 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1326 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1327 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1328
59bc1fd7
LP
1329 if (!callback)
1330 callback = signal_exit_callback;
1331
d1b75241
LP
1332 r = signal_is_blocked(sig);
1333 if (r < 0)
1334 return r;
1335 if (r == 0)
3022d74b
LP
1336 return -EBUSY;
1337
fd38203a
LP
1338 if (!e->signal_sources) {
1339 e->signal_sources = new0(sd_event_source*, _NSIG);
1340 if (!e->signal_sources)
1341 return -ENOMEM;
1342 } else if (e->signal_sources[sig])
1343 return -EBUSY;
1344
a71fe8b8 1345 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1346 if (!s)
1347 return -ENOMEM;
1348
1349 s->signal.sig = sig;
1350 s->signal.callback = callback;
1351 s->userdata = userdata;
baf76283 1352 s->enabled = SD_EVENT_ON;
fd38203a
LP
1353
1354 e->signal_sources[sig] = s;
fd38203a 1355
9da4cb2b 1356 r = event_make_signal_data(e, sig, &d);
ec766a51 1357 if (r < 0)
9da4cb2b 1358 return r;
fd38203a 1359
f1f00dbb
LP
1360 /* Use the signal name as description for the event source by default */
1361 (void) sd_event_source_set_description(s, signal_to_string(sig));
1362
a71fe8b8
LP
1363 if (ret)
1364 *ret = s;
ec766a51 1365 TAKE_PTR(s);
a71fe8b8 1366
fd38203a
LP
1367 return 0;
1368}
1369
b9350e70
LP
1370static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1371 assert(s);
1372
1373 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1374}
1375
f8f3f926
LP
1376static bool shall_use_pidfd(void) {
1377 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1378 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1379}
1380
f7262a9f 1381_public_ int sd_event_add_child(
305f78bf 1382 sd_event *e,
151b9b96 1383 sd_event_source **ret,
305f78bf
LP
1384 pid_t pid,
1385 int options,
718db961 1386 sd_event_child_handler_t callback,
151b9b96 1387 void *userdata) {
305f78bf 1388
ec766a51 1389 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1390 int r;
1391
305f78bf 1392 assert_return(e, -EINVAL);
b937d761 1393 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1394 assert_return(pid > 1, -EINVAL);
1395 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1396 assert_return(options != 0, -EINVAL);
da7e457c 1397 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1398 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1399
b9350e70
LP
1400 if (!callback)
1401 callback = child_exit_callback;
1402
b6d5481b 1403 if (e->n_online_child_sources == 0) {
ee880b37
LP
1404 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1405 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1406 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1407 * take effect.
1408 *
1409 * (As an optimization we only do this check on the first child event source created.) */
1410 r = signal_is_blocked(SIGCHLD);
1411 if (r < 0)
1412 return r;
1413 if (r == 0)
1414 return -EBUSY;
1415 }
1416
d5099efc 1417 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1418 if (r < 0)
1419 return r;
1420
4a0b58c4 1421 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1422 return -EBUSY;
1423
a71fe8b8 1424 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1425 if (!s)
1426 return -ENOMEM;
1427
f8f3f926 1428 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1429 s->child.pid = pid;
1430 s->child.options = options;
1431 s->child.callback = callback;
1432 s->userdata = userdata;
baf76283 1433 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1434
f8f3f926
LP
1435 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1436 * pin the PID, and make regular waitid() handling race-free. */
1437
1438 if (shall_use_pidfd()) {
1439 s->child.pidfd = pidfd_open(s->child.pid, 0);
1440 if (s->child.pidfd < 0) {
1441 /* Propagate errors unless the syscall is not supported or blocked */
1442 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1443 return -errno;
1444 } else
1445 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1446 } else
1447 s->child.pidfd = -1;
1448
4a0b58c4 1449 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1450 if (r < 0)
fd38203a 1451 return r;
fd38203a 1452
f8f3f926
LP
1453 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1454 /* We have a pidfd and we only want to watch for exit */
f8f3f926 1455 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1456 if (r < 0)
f8f3f926 1457 return r;
ac9f2640 1458
f8f3f926
LP
1459 } else {
1460 /* We have no pidfd or we shall wait for some other event than WEXITED */
f8f3f926 1461 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1462 if (r < 0)
f8f3f926 1463 return r;
f8f3f926
LP
1464
1465 e->need_process_child = true;
1466 }
c2ba3ad6 1467
b6d5481b 1468 e->n_online_child_sources++;
ac9f2640 1469
a71fe8b8
LP
1470 if (ret)
1471 *ret = s;
ec766a51 1472 TAKE_PTR(s);
f8f3f926
LP
1473 return 0;
1474}
1475
1476_public_ int sd_event_add_child_pidfd(
1477 sd_event *e,
1478 sd_event_source **ret,
1479 int pidfd,
1480 int options,
1481 sd_event_child_handler_t callback,
1482 void *userdata) {
1483
1484
1485 _cleanup_(source_freep) sd_event_source *s = NULL;
1486 pid_t pid;
1487 int r;
1488
1489 assert_return(e, -EINVAL);
1490 assert_return(e = event_resolve(e), -ENOPKG);
1491 assert_return(pidfd >= 0, -EBADF);
1492 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1493 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1494 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1495 assert_return(!event_pid_changed(e), -ECHILD);
1496
b9350e70
LP
1497 if (!callback)
1498 callback = child_exit_callback;
1499
b6d5481b 1500 if (e->n_online_child_sources == 0) {
ee880b37
LP
1501 r = signal_is_blocked(SIGCHLD);
1502 if (r < 0)
1503 return r;
1504 if (r == 0)
1505 return -EBUSY;
1506 }
1507
f8f3f926
LP
1508 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1509 if (r < 0)
1510 return r;
1511
1512 r = pidfd_get_pid(pidfd, &pid);
1513 if (r < 0)
1514 return r;
1515
1516 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1517 return -EBUSY;
1518
1519 s = source_new(e, !ret, SOURCE_CHILD);
1520 if (!s)
1521 return -ENOMEM;
1522
1523 s->wakeup = WAKEUP_EVENT_SOURCE;
1524 s->child.pidfd = pidfd;
1525 s->child.pid = pid;
1526 s->child.options = options;
1527 s->child.callback = callback;
1528 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1529 s->userdata = userdata;
1530 s->enabled = SD_EVENT_ONESHOT;
1531
1532 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1533 if (r < 0)
1534 return r;
1535
f8f3f926
LP
1536 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1537 /* We only want to watch for WEXITED */
f8f3f926 1538 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1539 if (r < 0)
f8f3f926 1540 return r;
f8f3f926
LP
1541 } else {
1542 /* We shall wait for some other event than WEXITED */
f8f3f926 1543 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1544 if (r < 0)
f8f3f926 1545 return r;
a71fe8b8 1546
f8f3f926
LP
1547 e->need_process_child = true;
1548 }
1549
b6d5481b 1550 e->n_online_child_sources++;
ac9f2640 1551
f8f3f926
LP
1552 if (ret)
1553 *ret = s;
f8f3f926 1554 TAKE_PTR(s);
fd38203a
LP
1555 return 0;
1556}
1557
b9350e70
LP
1558static int generic_exit_callback(sd_event_source *s, void *userdata) {
1559 assert(s);
1560
1561 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1562}
1563
f7262a9f 1564_public_ int sd_event_add_defer(
305f78bf 1565 sd_event *e,
151b9b96 1566 sd_event_source **ret,
718db961 1567 sd_event_handler_t callback,
151b9b96 1568 void *userdata) {
305f78bf 1569
ec766a51 1570 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1571 int r;
1572
305f78bf 1573 assert_return(e, -EINVAL);
b937d761 1574 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1575 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1576 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1577
b9350e70
LP
1578 if (!callback)
1579 callback = generic_exit_callback;
1580
a71fe8b8 1581 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1582 if (!s)
1583 return -ENOMEM;
1584
1585 s->defer.callback = callback;
1586 s->userdata = userdata;
baf76283 1587 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1588
1589 r = source_set_pending(s, true);
ec766a51 1590 if (r < 0)
fd38203a 1591 return r;
fd38203a 1592
a71fe8b8
LP
1593 if (ret)
1594 *ret = s;
ec766a51 1595 TAKE_PTR(s);
a71fe8b8 1596
fd38203a
LP
1597 return 0;
1598}
1599
6e9feda3
LP
1600_public_ int sd_event_add_post(
1601 sd_event *e,
1602 sd_event_source **ret,
1603 sd_event_handler_t callback,
1604 void *userdata) {
1605
ec766a51 1606 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1607 int r;
1608
1609 assert_return(e, -EINVAL);
b937d761 1610 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1611 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1612 assert_return(!event_pid_changed(e), -ECHILD);
1613
b9350e70
LP
1614 if (!callback)
1615 callback = generic_exit_callback;
1616
a71fe8b8 1617 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1618 if (!s)
1619 return -ENOMEM;
1620
1621 s->post.callback = callback;
1622 s->userdata = userdata;
1623 s->enabled = SD_EVENT_ON;
1624
de7fef4b 1625 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1626 if (r < 0)
6e9feda3 1627 return r;
de7fef4b 1628 assert(r > 0);
6e9feda3 1629
a71fe8b8
LP
1630 if (ret)
1631 *ret = s;
ec766a51 1632 TAKE_PTR(s);
a71fe8b8 1633
6e9feda3
LP
1634 return 0;
1635}
1636
6203e07a 1637_public_ int sd_event_add_exit(
305f78bf 1638 sd_event *e,
151b9b96 1639 sd_event_source **ret,
718db961 1640 sd_event_handler_t callback,
151b9b96 1641 void *userdata) {
305f78bf 1642
ec766a51 1643 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1644 int r;
1645
1646 assert_return(e, -EINVAL);
b937d761 1647 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1648 assert_return(callback, -EINVAL);
1649 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1650 assert_return(!event_pid_changed(e), -ECHILD);
1651
c983e776
EV
1652 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1653 if (r < 0)
1654 return r;
da7e457c 1655
a71fe8b8 1656 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1657 if (!s)
da7e457c 1658 return -ENOMEM;
fd38203a 1659
6203e07a 1660 s->exit.callback = callback;
da7e457c 1661 s->userdata = userdata;
6203e07a 1662 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1663 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1664
6203e07a 1665 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1666 if (r < 0)
da7e457c 1667 return r;
da7e457c 1668
a71fe8b8
LP
1669 if (ret)
1670 *ret = s;
ec766a51 1671 TAKE_PTR(s);
a71fe8b8 1672
da7e457c
LP
1673 return 0;
1674}
1675
97ef5391
LP
1676static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1677 assert(e);
1678
1679 if (!d)
1680 return;
1681
1682 assert(hashmap_isempty(d->inodes));
1683 assert(hashmap_isempty(d->wd));
1684
1685 if (d->buffer_filled > 0)
1686 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1687
1688 hashmap_free(d->inodes);
1689 hashmap_free(d->wd);
1690
1691 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1692
1693 if (d->fd >= 0) {
1694 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1695 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1696
1697 safe_close(d->fd);
1698 }
1699 free(d);
1700}
1701
1702static int event_make_inotify_data(
1703 sd_event *e,
1704 int64_t priority,
1705 struct inotify_data **ret) {
1706
1707 _cleanup_close_ int fd = -1;
1708 struct inotify_data *d;
97ef5391
LP
1709 int r;
1710
1711 assert(e);
1712
1713 d = hashmap_get(e->inotify_data, &priority);
1714 if (d) {
1715 if (ret)
1716 *ret = d;
1717 return 0;
1718 }
1719
1720 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1721 if (fd < 0)
1722 return -errno;
1723
1724 fd = fd_move_above_stdio(fd);
1725
97ef5391
LP
1726 d = new(struct inotify_data, 1);
1727 if (!d)
1728 return -ENOMEM;
1729
1730 *d = (struct inotify_data) {
1731 .wakeup = WAKEUP_INOTIFY_DATA,
1732 .fd = TAKE_FD(fd),
1733 .priority = priority,
1734 };
1735
c2484a75 1736 r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d);
97ef5391
LP
1737 if (r < 0) {
1738 d->fd = safe_close(d->fd);
1739 free(d);
1740 return r;
1741 }
1742
1eac7948 1743 struct epoll_event ev = {
97ef5391
LP
1744 .events = EPOLLIN,
1745 .data.ptr = d,
1746 };
1747
1748 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1749 r = -errno;
1750 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1751 * remove the fd from the epoll first, which we don't want as we couldn't
1752 * add it in the first place. */
1753 event_free_inotify_data(e, d);
1754 return r;
1755 }
1756
1757 if (ret)
1758 *ret = d;
1759
1760 return 1;
1761}
1762
7a08d314 1763static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1764 int r;
97ef5391
LP
1765
1766 assert(x);
1767 assert(y);
1768
90c88092
YW
1769 r = CMP(x->dev, y->dev);
1770 if (r != 0)
1771 return r;
97ef5391 1772
6dd91b36 1773 return CMP(x->ino, y->ino);
97ef5391
LP
1774}
1775
7a08d314
YW
1776static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1777 assert(d);
97ef5391
LP
1778
1779 siphash24_compress(&d->dev, sizeof(d->dev), state);
1780 siphash24_compress(&d->ino, sizeof(d->ino), state);
1781}
1782
7a08d314 1783DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1784
1785static void event_free_inode_data(
1786 sd_event *e,
1787 struct inode_data *d) {
1788
1789 assert(e);
1790
1791 if (!d)
1792 return;
1793
1794 assert(!d->event_sources);
1795
1796 if (d->fd >= 0) {
1797 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1798 safe_close(d->fd);
1799 }
1800
1801 if (d->inotify_data) {
1802
1803 if (d->wd >= 0) {
1804 if (d->inotify_data->fd >= 0) {
1805 /* So here's a problem. At the time this runs the watch descriptor might already be
1806 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1807 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1808 * likely case to happen. */
1809
1810 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1811 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1812 }
1813
1814 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1815 }
1816
1817 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1818 }
1819
1820 free(d);
1821}
1822
53baf2ef
LP
1823static void event_gc_inotify_data(
1824 sd_event *e,
1825 struct inotify_data *d) {
1826
1827 assert(e);
1828
1829 /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch
1830 * any inode with it anymore, which in turn happens if no event source of this priority is interested
1831 * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC
1832 * (under the expectation that the GC is called again once the counter is decremented). */
1833
1834 if (!d)
1835 return;
1836
1837 if (!hashmap_isempty(d->inodes))
1838 return;
1839
1840 if (d->n_busy > 0)
1841 return;
1842
1843 event_free_inotify_data(e, d);
1844}
1845
97ef5391
LP
1846static void event_gc_inode_data(
1847 sd_event *e,
1848 struct inode_data *d) {
1849
1850 struct inotify_data *inotify_data;
1851
1852 assert(e);
1853
1854 if (!d)
1855 return;
1856
1857 if (d->event_sources)
1858 return;
1859
1860 inotify_data = d->inotify_data;
1861 event_free_inode_data(e, d);
1862
53baf2ef 1863 event_gc_inotify_data(e, inotify_data);
97ef5391
LP
1864}
1865
1866static int event_make_inode_data(
1867 sd_event *e,
1868 struct inotify_data *inotify_data,
1869 dev_t dev,
1870 ino_t ino,
1871 struct inode_data **ret) {
1872
1873 struct inode_data *d, key;
1874 int r;
1875
1876 assert(e);
1877 assert(inotify_data);
1878
1879 key = (struct inode_data) {
1880 .ino = ino,
1881 .dev = dev,
1882 };
1883
1884 d = hashmap_get(inotify_data->inodes, &key);
1885 if (d) {
1886 if (ret)
1887 *ret = d;
1888
1889 return 0;
1890 }
1891
1892 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1893 if (r < 0)
1894 return r;
1895
1896 d = new(struct inode_data, 1);
1897 if (!d)
1898 return -ENOMEM;
1899
1900 *d = (struct inode_data) {
1901 .dev = dev,
1902 .ino = ino,
1903 .wd = -1,
1904 .fd = -1,
1905 .inotify_data = inotify_data,
1906 };
1907
1908 r = hashmap_put(inotify_data->inodes, d, d);
1909 if (r < 0) {
1910 free(d);
1911 return r;
1912 }
1913
1914 if (ret)
1915 *ret = d;
1916
1917 return 1;
1918}
1919
1920static uint32_t inode_data_determine_mask(struct inode_data *d) {
1921 bool excl_unlink = true;
1922 uint32_t combined = 0;
97ef5391
LP
1923
1924 assert(d);
1925
1926 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1927 * the IN_EXCL_UNLINK flag is ANDed instead.
1928 *
1929 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1930 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1931 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1932 * events we don't care for client-side. */
1933
1934 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1935
1936 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1937 excl_unlink = false;
1938
1939 combined |= s->inotify.mask;
1940 }
1941
1942 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1943}
1944
1945static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1946 uint32_t combined_mask;
1947 int wd, r;
1948
1949 assert(d);
1950 assert(d->fd >= 0);
1951
1952 combined_mask = inode_data_determine_mask(d);
1953
1954 if (d->wd >= 0 && combined_mask == d->combined_mask)
1955 return 0;
1956
1957 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1958 if (r < 0)
1959 return r;
1960
1961 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1962 if (wd < 0)
1963 return -errno;
1964
1965 if (d->wd < 0) {
1966 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1967 if (r < 0) {
1968 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1969 return r;
1970 }
1971
1972 d->wd = wd;
1973
1974 } else if (d->wd != wd) {
1975
1976 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1977 (void) inotify_rm_watch(d->fd, wd);
1978 return -EINVAL;
1979 }
1980
1981 d->combined_mask = combined_mask;
1982 return 1;
1983}
1984
b9350e70
LP
1985static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1986 assert(s);
1987
1988 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1989}
1990
e67d738a 1991static int event_add_inotify_fd_internal(
97ef5391
LP
1992 sd_event *e,
1993 sd_event_source **ret,
e67d738a
LP
1994 int fd,
1995 bool donate,
97ef5391
LP
1996 uint32_t mask,
1997 sd_event_inotify_handler_t callback,
1998 void *userdata) {
1999
e67d738a
LP
2000 _cleanup_close_ int donated_fd = donate ? fd : -1;
2001 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
2002 struct inotify_data *inotify_data = NULL;
2003 struct inode_data *inode_data = NULL;
97ef5391
LP
2004 struct stat st;
2005 int r;
2006
2007 assert_return(e, -EINVAL);
2008 assert_return(e = event_resolve(e), -ENOPKG);
e67d738a 2009 assert_return(fd >= 0, -EBADF);
97ef5391
LP
2010 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2011 assert_return(!event_pid_changed(e), -ECHILD);
2012
b9350e70
LP
2013 if (!callback)
2014 callback = inotify_exit_callback;
2015
97ef5391
LP
2016 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
2017 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
2018 * the user can't use them for us. */
2019 if (mask & IN_MASK_ADD)
2020 return -EINVAL;
2021
97ef5391
LP
2022 if (fstat(fd, &st) < 0)
2023 return -errno;
2024
2025 s = source_new(e, !ret, SOURCE_INOTIFY);
2026 if (!s)
2027 return -ENOMEM;
2028
2029 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
2030 s->inotify.mask = mask;
2031 s->inotify.callback = callback;
2032 s->userdata = userdata;
2033
2034 /* Allocate an inotify object for this priority, and an inode object within it */
2035 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
2036 if (r < 0)
8c75fe17 2037 return r;
97ef5391
LP
2038
2039 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17 2040 if (r < 0) {
e67d738a 2041 event_gc_inotify_data(e, inotify_data);
8c75fe17
ZJS
2042 return r;
2043 }
97ef5391
LP
2044
2045 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2046 * the event source, until then, for which we need the original inode. */
2047 if (inode_data->fd < 0) {
e67d738a
LP
2048 if (donated_fd >= 0)
2049 inode_data->fd = TAKE_FD(donated_fd);
2050 else {
2051 inode_data->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2052 if (inode_data->fd < 0) {
2053 r = -errno;
2054 event_gc_inode_data(e, inode_data);
2055 return r;
2056 }
2057 }
2058
97ef5391
LP
2059 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
2060 }
2061
2062 /* Link our event source to the inode data object */
2063 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
2064 s->inotify.inode_data = inode_data;
2065
97ef5391
LP
2066 /* Actually realize the watch now */
2067 r = inode_data_realize_watch(e, inode_data);
2068 if (r < 0)
8c75fe17 2069 return r;
97ef5391 2070
97ef5391
LP
2071 if (ret)
2072 *ret = s;
8c75fe17 2073 TAKE_PTR(s);
97ef5391
LP
2074
2075 return 0;
97ef5391
LP
2076}
2077
e67d738a
LP
2078_public_ int sd_event_add_inotify_fd(
2079 sd_event *e,
2080 sd_event_source **ret,
2081 int fd,
2082 uint32_t mask,
2083 sd_event_inotify_handler_t callback,
2084 void *userdata) {
2085
2086 return event_add_inotify_fd_internal(e, ret, fd, /* donate= */ false, mask, callback, userdata);
2087}
2088
2089_public_ int sd_event_add_inotify(
2090 sd_event *e,
2091 sd_event_source **ret,
2092 const char *path,
2093 uint32_t mask,
2094 sd_event_inotify_handler_t callback,
2095 void *userdata) {
2096
2091c779 2097 sd_event_source *s = NULL; /* avoid false maybe-uninitialized warning */
e67d738a
LP
2098 int fd, r;
2099
2100 assert_return(path, -EINVAL);
2101
2102 fd = open(path, O_PATH|O_CLOEXEC|
2103 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
2104 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
2105 if (fd < 0)
2106 return -errno;
2107
2108 r = event_add_inotify_fd_internal(e, &s, fd, /* donate= */ true, mask, callback, userdata);
2109 if (r < 0)
2110 return r;
2111
2112 (void) sd_event_source_set_description(s, path);
2113
2114 if (ret)
2115 *ret = s;
2116
2117 return r;
2118}
2119
8301aa0b 2120static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
2121 if (!s)
2122 return NULL;
da7e457c 2123
8301aa0b
YW
2124 /* Here's a special hack: when we are called from a
2125 * dispatch handler we won't free the event source
2126 * immediately, but we will detach the fd from the
2127 * epoll. This way it is safe for the caller to unref
2128 * the event source and immediately close the fd, but
2129 * we still retain a valid event source object after
2130 * the callback. */
fd38203a 2131
8301aa0b
YW
2132 if (s->dispatching) {
2133 if (s->type == SOURCE_IO)
2134 source_io_unregister(s);
fd38203a 2135
8301aa0b
YW
2136 source_disconnect(s);
2137 } else
2138 source_free(s);
fd38203a
LP
2139
2140 return NULL;
2141}
2142
8301aa0b
YW
2143DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
2144
356779df 2145_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 2146 assert_return(s, -EINVAL);
f4b2933e 2147 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2148
356779df 2149 return free_and_strdup(&s->description, description);
f7f53e9e
TG
2150}
2151
356779df 2152_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 2153 assert_return(s, -EINVAL);
356779df 2154 assert_return(description, -EINVAL);
f4b2933e 2155 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2156
7d92a1a4
ZJS
2157 if (!s->description)
2158 return -ENXIO;
2159
356779df 2160 *description = s->description;
f7f53e9e
TG
2161 return 0;
2162}
2163
adcc4ca3 2164_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 2165 assert_return(s, NULL);
eaa3cbef
LP
2166
2167 return s->event;
2168}
2169
f7262a9f 2170_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 2171 assert_return(s, -EINVAL);
6203e07a 2172 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 2173 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2174 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2175
2176 return s->pending;
2177}
2178
f7262a9f 2179_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2180 assert_return(s, -EINVAL);
2181 assert_return(s->type == SOURCE_IO, -EDOM);
2182 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2183
2184 return s->io.fd;
2185}
2186
30caf8f3
LP
2187_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2188 int r;
2189
2190 assert_return(s, -EINVAL);
8ac43fee 2191 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2192 assert_return(s->type == SOURCE_IO, -EDOM);
2193 assert_return(!event_pid_changed(s->event), -ECHILD);
2194
2195 if (s->io.fd == fd)
2196 return 0;
2197
b6d5481b 2198 if (event_source_is_offline(s)) {
30caf8f3
LP
2199 s->io.fd = fd;
2200 s->io.registered = false;
2201 } else {
2202 int saved_fd;
2203
2204 saved_fd = s->io.fd;
2205 assert(s->io.registered);
2206
2207 s->io.fd = fd;
2208 s->io.registered = false;
2209
2210 r = source_io_register(s, s->enabled, s->io.events);
2211 if (r < 0) {
2212 s->io.fd = saved_fd;
2213 s->io.registered = true;
2214 return r;
2215 }
2216
5a795bff 2217 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2218 }
2219
2220 return 0;
2221}
2222
ab93297c
NM
2223_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2224 assert_return(s, -EINVAL);
2225 assert_return(s->type == SOURCE_IO, -EDOM);
2226
2227 return s->io.owned;
2228}
2229
2230_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2231 assert_return(s, -EINVAL);
2232 assert_return(s->type == SOURCE_IO, -EDOM);
2233
2234 s->io.owned = own;
2235 return 0;
2236}
2237
f7262a9f 2238_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2239 assert_return(s, -EINVAL);
2240 assert_return(events, -EINVAL);
2241 assert_return(s->type == SOURCE_IO, -EDOM);
2242 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2243
2244 *events = s->io.events;
2245 return 0;
2246}
2247
f7262a9f 2248_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2249 int r;
2250
305f78bf
LP
2251 assert_return(s, -EINVAL);
2252 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2253 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2254 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2255 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2256
b63c8d4f
DH
2257 /* edge-triggered updates are never skipped, so we can reset edges */
2258 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2259 return 0;
2260
2a0dc6cd
LP
2261 r = source_set_pending(s, false);
2262 if (r < 0)
2263 return r;
2264
b6d5481b 2265 if (event_source_is_online(s)) {
e4715127 2266 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2267 if (r < 0)
2268 return r;
2269 }
2270
2271 s->io.events = events;
2272
2273 return 0;
2274}
2275
f7262a9f 2276_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2277 assert_return(s, -EINVAL);
2278 assert_return(revents, -EINVAL);
2279 assert_return(s->type == SOURCE_IO, -EDOM);
2280 assert_return(s->pending, -ENODATA);
2281 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2282
2283 *revents = s->io.revents;
2284 return 0;
2285}
2286
f7262a9f 2287_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2288 assert_return(s, -EINVAL);
2289 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2290 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2291
2292 return s->signal.sig;
2293}
2294
31927c16 2295_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2296 assert_return(s, -EINVAL);
2297 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2298
6680b8d1
ME
2299 *priority = s->priority;
2300 return 0;
fd38203a
LP
2301}
2302
31927c16 2303_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2304 bool rm_inotify = false, rm_inode = false;
2305 struct inotify_data *new_inotify_data = NULL;
2306 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2307 int r;
2308
305f78bf 2309 assert_return(s, -EINVAL);
da7e457c 2310 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2311 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2312
2313 if (s->priority == priority)
2314 return 0;
2315
97ef5391
LP
2316 if (s->type == SOURCE_INOTIFY) {
2317 struct inode_data *old_inode_data;
2318
2319 assert(s->inotify.inode_data);
2320 old_inode_data = s->inotify.inode_data;
2321
2322 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2323 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2324 * events we allow priority changes only until the first following iteration. */
2325 if (old_inode_data->fd < 0)
2326 return -EOPNOTSUPP;
2327
2328 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2329 if (r < 0)
2330 return r;
2331 rm_inotify = r > 0;
2332
2333 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2334 if (r < 0)
2335 goto fail;
2336 rm_inode = r > 0;
2337
2338 if (new_inode_data->fd < 0) {
2339 /* Duplicate the fd for the new inode object if we don't have any yet */
2340 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2341 if (new_inode_data->fd < 0) {
2342 r = -errno;
2343 goto fail;
2344 }
2345
2346 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2347 }
2348
2349 /* Move the event source to the new inode data structure */
2350 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2351 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2352 s->inotify.inode_data = new_inode_data;
2353
2354 /* Now create the new watch */
2355 r = inode_data_realize_watch(s->event, new_inode_data);
2356 if (r < 0) {
2357 /* Move it back */
2358 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2359 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2360 s->inotify.inode_data = old_inode_data;
2361 goto fail;
2362 }
2363
2364 s->priority = priority;
2365
2366 event_gc_inode_data(s->event, old_inode_data);
2367
b6d5481b 2368 } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) {
9da4cb2b
LP
2369 struct signal_data *old, *d;
2370
2371 /* Move us from the signalfd belonging to the old
2372 * priority to the signalfd of the new priority */
2373
2374 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2375
2376 s->priority = priority;
2377
2378 r = event_make_signal_data(s->event, s->signal.sig, &d);
2379 if (r < 0) {
2380 s->priority = old->priority;
2381 return r;
2382 }
2383
2384 event_unmask_signal_data(s->event, old, s->signal.sig);
2385 } else
2386 s->priority = priority;
fd38203a 2387
e1951c16 2388 event_source_pp_prioq_reshuffle(s);
fd38203a 2389
6203e07a
LP
2390 if (s->type == SOURCE_EXIT)
2391 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2392
fd38203a 2393 return 0;
97ef5391
LP
2394
2395fail:
2396 if (rm_inode)
2397 event_free_inode_data(s->event, new_inode_data);
2398
2399 if (rm_inotify)
2400 event_free_inotify_data(s->event, new_inotify_data);
2401
2402 return r;
fd38203a
LP
2403}
2404
cad143a8 2405_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
305f78bf 2406 assert_return(s, -EINVAL);
305f78bf 2407 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2408
cad143a8
LP
2409 if (ret)
2410 *ret = s->enabled;
2411
08c1eb0e 2412 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2413}
2414
b6d5481b
LP
2415static int event_source_offline(
2416 sd_event_source *s,
2417 int enabled,
2418 bool ratelimited) {
2419
2420 bool was_offline;
fd38203a
LP
2421 int r;
2422
ddfde737 2423 assert(s);
b6d5481b 2424 assert(enabled == SD_EVENT_OFF || ratelimited);
fd38203a 2425
ddfde737 2426 /* Unset the pending flag when this event source is disabled */
b6d5481b
LP
2427 if (s->enabled != SD_EVENT_OFF &&
2428 enabled == SD_EVENT_OFF &&
2429 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2430 r = source_set_pending(s, false);
2431 if (r < 0)
2432 return r;
2433 }
cc567911 2434
b6d5481b
LP
2435 was_offline = event_source_is_offline(s);
2436 s->enabled = enabled;
2437 s->ratelimited = ratelimited;
fd38203a 2438
ddfde737 2439 switch (s->type) {
fd38203a 2440
ddfde737
LP
2441 case SOURCE_IO:
2442 source_io_unregister(s);
2443 break;
ac989a78 2444
ddfde737
LP
2445 case SOURCE_SIGNAL:
2446 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2447 break;
fd38203a 2448
ddfde737 2449 case SOURCE_CHILD:
b6d5481b
LP
2450 if (!was_offline) {
2451 assert(s->event->n_online_child_sources > 0);
2452 s->event->n_online_child_sources--;
2453 }
fd38203a 2454
ddfde737
LP
2455 if (EVENT_SOURCE_WATCH_PIDFD(s))
2456 source_child_pidfd_unregister(s);
2457 else
2458 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2459 break;
4807d2d0 2460
ddfde737
LP
2461 case SOURCE_EXIT:
2462 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2463 break;
fd38203a 2464
2115b9b6
YW
2465 case SOURCE_TIME_REALTIME:
2466 case SOURCE_TIME_BOOTTIME:
2467 case SOURCE_TIME_MONOTONIC:
2468 case SOURCE_TIME_REALTIME_ALARM:
2469 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737
LP
2470 case SOURCE_DEFER:
2471 case SOURCE_POST:
2472 case SOURCE_INOTIFY:
2473 break;
fd38203a 2474
ddfde737 2475 default:
04499a70 2476 assert_not_reached();
ddfde737 2477 }
fd38203a 2478
2115b9b6
YW
2479 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2480 event_source_time_prioq_reshuffle(s);
2481
b6d5481b 2482 return 1;
ddfde737 2483}
f8f3f926 2484
b6d5481b
LP
2485static int event_source_online(
2486 sd_event_source *s,
2487 int enabled,
2488 bool ratelimited) {
2489
2490 bool was_online;
ddfde737 2491 int r;
fd38203a 2492
ddfde737 2493 assert(s);
b6d5481b 2494 assert(enabled != SD_EVENT_OFF || !ratelimited);
305f78bf 2495
ddfde737 2496 /* Unset the pending flag when this event source is enabled */
b6d5481b
LP
2497 if (s->enabled == SD_EVENT_OFF &&
2498 enabled != SD_EVENT_OFF &&
2499 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2500 r = source_set_pending(s, false);
2501 if (r < 0)
2502 return r;
2503 }
9d3e3aa5 2504
b6d5481b
LP
2505 /* Are we really ready for onlining? */
2506 if (enabled == SD_EVENT_OFF || ratelimited) {
2507 /* Nope, we are not ready for onlining, then just update the precise state and exit */
2508 s->enabled = enabled;
2509 s->ratelimited = ratelimited;
2510 return 0;
2511 }
2512
2513 was_online = event_source_is_online(s);
2514
ddfde737 2515 switch (s->type) {
ddfde737 2516 case SOURCE_IO:
b6d5481b 2517 r = source_io_register(s, enabled, s->io.events);
d2eafe61 2518 if (r < 0)
ddfde737 2519 return r;
ddfde737 2520 break;
fd38203a 2521
ddfde737
LP
2522 case SOURCE_SIGNAL:
2523 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2524 if (r < 0) {
ddfde737
LP
2525 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2526 return r;
2527 }
fd38203a 2528
ddfde737 2529 break;
fd38203a 2530
ddfde737 2531 case SOURCE_CHILD:
ddfde737
LP
2532 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2533 /* yes, we have pidfd */
9da4cb2b 2534
b6d5481b 2535 r = source_child_pidfd_register(s, enabled);
ac9f2640 2536 if (r < 0)
9da4cb2b 2537 return r;
ddfde737
LP
2538 } else {
2539 /* no pidfd, or something other to watch for than WEXITED */
9da4cb2b 2540
ddfde737
LP
2541 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2542 if (r < 0) {
ddfde737
LP
2543 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2544 return r;
2545 }
2546 }
fd38203a 2547
b6d5481b
LP
2548 if (!was_online)
2549 s->event->n_online_child_sources++;
ddfde737 2550 break;
4807d2d0 2551
d2eafe61
ZJS
2552 case SOURCE_TIME_REALTIME:
2553 case SOURCE_TIME_BOOTTIME:
2554 case SOURCE_TIME_MONOTONIC:
2555 case SOURCE_TIME_REALTIME_ALARM:
2556 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737 2557 case SOURCE_EXIT:
ddfde737
LP
2558 case SOURCE_DEFER:
2559 case SOURCE_POST:
2560 case SOURCE_INOTIFY:
2561 break;
9da4cb2b 2562
ddfde737 2563 default:
04499a70 2564 assert_not_reached();
ddfde737 2565 }
f8f3f926 2566
b6d5481b
LP
2567 s->enabled = enabled;
2568 s->ratelimited = ratelimited;
d2eafe61
ZJS
2569
2570 /* Non-failing operations below */
2115b9b6 2571 if (s->type == SOURCE_EXIT)
d2eafe61 2572 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
d2eafe61 2573
2115b9b6
YW
2574 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2575 event_source_time_prioq_reshuffle(s);
d2eafe61 2576
b6d5481b 2577 return 1;
ddfde737
LP
2578}
2579
2580_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2581 int r;
9da4cb2b 2582
ddfde737
LP
2583 assert_return(s, -EINVAL);
2584 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2585 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2586
ddfde737
LP
2587 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2588 if (s->event->state == SD_EVENT_FINISHED)
2589 return m == SD_EVENT_OFF ? 0 : -ESTALE;
305f78bf 2590
ddfde737
LP
2591 if (s->enabled == m) /* No change? */
2592 return 0;
9d3e3aa5 2593
ddfde737 2594 if (m == SD_EVENT_OFF)
b6d5481b 2595 r = event_source_offline(s, m, s->ratelimited);
ddfde737
LP
2596 else {
2597 if (s->enabled != SD_EVENT_OFF) {
2598 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2599 * event source is already enabled after all. */
2600 s->enabled = m;
2601 return 0;
fd38203a 2602 }
ddfde737 2603
b6d5481b 2604 r = event_source_online(s, m, s->ratelimited);
fd38203a 2605 }
ddfde737
LP
2606 if (r < 0)
2607 return r;
fd38203a 2608
e1951c16 2609 event_source_pp_prioq_reshuffle(s);
fd38203a
LP
2610 return 0;
2611}
2612
f7262a9f 2613_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2614 assert_return(s, -EINVAL);
2615 assert_return(usec, -EINVAL);
6a0f1f6d 2616 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2617 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2618
2619 *usec = s->time.next;
2620 return 0;
2621}
2622
f7262a9f 2623_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2624 int r;
6a0f1f6d 2625
305f78bf 2626 assert_return(s, -EINVAL);
6a0f1f6d 2627 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2628 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2629 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2630
2a0dc6cd
LP
2631 r = source_set_pending(s, false);
2632 if (r < 0)
2633 return r;
2576a19e 2634
2a0dc6cd 2635 s->time.next = usec;
fd38203a 2636
e1951c16 2637 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2638 return 0;
2639}
2640
d6a83dc4
LP
2641_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2642 usec_t t;
2643 int r;
2644
2645 assert_return(s, -EINVAL);
2646 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2647
2648 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2649 if (r < 0)
2650 return r;
2651
496db330
YW
2652 usec = usec_add(t, usec);
2653 if (usec == USEC_INFINITY)
d6a83dc4
LP
2654 return -EOVERFLOW;
2655
496db330 2656 return sd_event_source_set_time(s, usec);
d6a83dc4
LP
2657}
2658
f7262a9f 2659_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2660 assert_return(s, -EINVAL);
2661 assert_return(usec, -EINVAL);
6a0f1f6d 2662 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2663 assert_return(!event_pid_changed(s->event), -ECHILD);
2664
2665 *usec = s->time.accuracy;
2666 return 0;
2667}
2668
f7262a9f 2669_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2670 int r;
6a0f1f6d 2671
305f78bf 2672 assert_return(s, -EINVAL);
f5fbe71d 2673 assert_return(usec != UINT64_MAX, -EINVAL);
6a0f1f6d 2674 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2675 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2676 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2677
2a0dc6cd
LP
2678 r = source_set_pending(s, false);
2679 if (r < 0)
2680 return r;
2681
eaa3cbef
LP
2682 if (usec == 0)
2683 usec = DEFAULT_ACCURACY_USEC;
2684
eaa3cbef
LP
2685 s->time.accuracy = usec;
2686
e1951c16 2687 event_source_time_prioq_reshuffle(s);
6a0f1f6d
LP
2688 return 0;
2689}
2690
2691_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2692 assert_return(s, -EINVAL);
2693 assert_return(clock, -EINVAL);
2694 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2695 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2696
6a0f1f6d 2697 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2698 return 0;
2699}
2700
f7262a9f 2701_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2702 assert_return(s, -EINVAL);
2703 assert_return(pid, -EINVAL);
2704 assert_return(s->type == SOURCE_CHILD, -EDOM);
2705 assert_return(!event_pid_changed(s->event), -ECHILD);
2706
2707 *pid = s->child.pid;
2708 return 0;
2709}
2710
f8f3f926
LP
2711_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2712 assert_return(s, -EINVAL);
2713 assert_return(s->type == SOURCE_CHILD, -EDOM);
2714 assert_return(!event_pid_changed(s->event), -ECHILD);
2715
2716 if (s->child.pidfd < 0)
2717 return -EOPNOTSUPP;
2718
2719 return s->child.pidfd;
2720}
2721
2722_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2723 assert_return(s, -EINVAL);
2724 assert_return(s->type == SOURCE_CHILD, -EDOM);
2725 assert_return(!event_pid_changed(s->event), -ECHILD);
2726 assert_return(SIGNAL_VALID(sig), -EINVAL);
2727
2728 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2729 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2730 * available. */
2731 if (s->child.exited)
2732 return -ESRCH;
2733
2734 if (s->child.pidfd >= 0) {
2735 siginfo_t copy;
2736
2737 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2738 * structure here */
2739 if (si)
2740 copy = *si;
2741
2742 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2743 /* Let's propagate the error only if the system call is not implemented or prohibited */
2744 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2745 return -errno;
2746 } else
2747 return 0;
2748 }
2749
2750 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2751 * this here. */
2752 if (flags != 0)
2753 return -EOPNOTSUPP;
2754
2755 if (si) {
2756 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2757 siginfo_t copy = *si;
2758
2759 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2760 return -errno;
2761 } else if (kill(s->child.pid, sig) < 0)
2762 return -errno;
2763
2764 return 0;
2765}
2766
2767_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2768 assert_return(s, -EINVAL);
2769 assert_return(s->type == SOURCE_CHILD, -EDOM);
2770
2771 if (s->child.pidfd < 0)
2772 return -EOPNOTSUPP;
2773
2774 return s->child.pidfd_owned;
2775}
2776
2777_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2778 assert_return(s, -EINVAL);
2779 assert_return(s->type == SOURCE_CHILD, -EDOM);
2780
2781 if (s->child.pidfd < 0)
2782 return -EOPNOTSUPP;
2783
2784 s->child.pidfd_owned = own;
2785 return 0;
2786}
2787
2788_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2789 assert_return(s, -EINVAL);
2790 assert_return(s->type == SOURCE_CHILD, -EDOM);
2791
2792 return s->child.process_owned;
2793}
2794
2795_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2796 assert_return(s, -EINVAL);
2797 assert_return(s->type == SOURCE_CHILD, -EDOM);
2798
2799 s->child.process_owned = own;
2800 return 0;
2801}
2802
97ef5391
LP
2803_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2804 assert_return(s, -EINVAL);
2805 assert_return(mask, -EINVAL);
2806 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2807 assert_return(!event_pid_changed(s->event), -ECHILD);
2808
2809 *mask = s->inotify.mask;
2810 return 0;
2811}
2812
718db961 2813_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2814 int r;
2815
da7e457c 2816 assert_return(s, -EINVAL);
6203e07a 2817 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2818 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2819 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2820
2821 if (s->prepare == callback)
2822 return 0;
2823
2824 if (callback && s->prepare) {
2825 s->prepare = callback;
2826 return 0;
2827 }
2828
2829 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2830 if (r < 0)
2831 return r;
2832
2833 s->prepare = callback;
2834
2835 if (callback) {
2836 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2837 if (r < 0)
2838 return r;
2839 } else
2840 prioq_remove(s->event->prepare, s, &s->prepare_index);
2841
2842 return 0;
2843}
2844
f7262a9f 2845_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2846 assert_return(s, NULL);
fd38203a
LP
2847
2848 return s->userdata;
2849}
2850
8f726607
LP
2851_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2852 void *ret;
2853
2854 assert_return(s, NULL);
2855
2856 ret = s->userdata;
2857 s->userdata = userdata;
2858
2859 return ret;
2860}
2861
b6d5481b
LP
2862static int event_source_enter_ratelimited(sd_event_source *s) {
2863 int r;
2864
2865 assert(s);
2866
2867 /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
2868 * the end of the rate limit time window, much as if it was a timer event source. */
2869
2870 if (s->ratelimited)
2871 return 0; /* Already ratelimited, this is a NOP hence */
2872
2873 /* Make sure we can install a CLOCK_MONOTONIC event further down. */
2874 r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC);
2875 if (r < 0)
2876 return r;
2877
2878 /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
2879 * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
2880 * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
2881 if (EVENT_SOURCE_IS_TIME(s->type))
2882 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2883
2884 /* Now, let's add the event source to the monotonic clock instead */
2885 r = event_source_time_prioq_put(s, &s->event->monotonic);
2886 if (r < 0)
2887 goto fail;
2888
2889 /* And let's take the event source officially offline */
2890 r = event_source_offline(s, s->enabled, /* ratelimited= */ true);
2891 if (r < 0) {
2892 event_source_time_prioq_remove(s, &s->event->monotonic);
2893 goto fail;
2894 }
2895
2896 event_source_pp_prioq_reshuffle(s);
2897
2898 log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description));
2899 return 0;
2900
2901fail:
2902 /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
2903 * space for it should already be allocated. */
2904 if (EVENT_SOURCE_IS_TIME(s->type))
2905 assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0);
2906
2907 return r;
2908}
2909
fd69f224 2910static int event_source_leave_ratelimit(sd_event_source *s, bool run_callback) {
b6d5481b
LP
2911 int r;
2912
2913 assert(s);
2914
2915 if (!s->ratelimited)
2916 return 0;
2917
2918 /* Let's take the event source out of the monotonic prioq first. */
2919 event_source_time_prioq_remove(s, &s->event->monotonic);
2920
2921 /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
2922 if (EVENT_SOURCE_IS_TIME(s->type)) {
2923 r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type));
2924 if (r < 0)
2925 goto fail;
2926 }
2927
2928 /* Let's try to take it online again. */
2929 r = event_source_online(s, s->enabled, /* ratelimited= */ false);
2930 if (r < 0) {
2931 /* Do something roughly sensible when this failed: undo the two prioq ops above */
2932 if (EVENT_SOURCE_IS_TIME(s->type))
2933 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2934
2935 goto fail;
2936 }
2937
2938 event_source_pp_prioq_reshuffle(s);
2939 ratelimit_reset(&s->rate_limit);
2940
2941 log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description));
fd69f224
MS
2942
2943 if (run_callback && s->ratelimit_expire_callback) {
2944 s->dispatching = true;
2945 r = s->ratelimit_expire_callback(s, s->userdata);
2946 s->dispatching = false;
2947
2948 if (r < 0) {
2949 log_debug_errno(r, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m",
2950 strna(s->description),
2951 event_source_type_to_string(s->type),
2952 s->exit_on_failure ? "exiting" : "disabling");
2953
2954 if (s->exit_on_failure)
2955 (void) sd_event_exit(s->event, r);
2956 }
2957
2958 if (s->n_ref == 0)
2959 source_free(s);
2960 else if (r < 0)
0a040e64 2961 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
fd69f224
MS
2962
2963 return 1;
2964 }
2965
b6d5481b
LP
2966 return 0;
2967
2968fail:
2969 /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
2970 * simply put it back in it, maybe we can then process it more successfully next iteration. */
2971 assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0);
2972
2973 return r;
2974}
2975
c2ba3ad6
LP
2976static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2977 usec_t c;
2978 assert(e);
2979 assert(a <= b);
2980
2981 if (a <= 0)
2982 return 0;
393003e1
LP
2983 if (a >= USEC_INFINITY)
2984 return USEC_INFINITY;
c2ba3ad6
LP
2985
2986 if (b <= a + 1)
2987 return a;
2988
52444dc4
LP
2989 initialize_perturb(e);
2990
c2ba3ad6
LP
2991 /*
2992 Find a good time to wake up again between times a and b. We
2993 have two goals here:
2994
2995 a) We want to wake up as seldom as possible, hence prefer
2996 later times over earlier times.
2997
2998 b) But if we have to wake up, then let's make sure to
2999 dispatch as much as possible on the entire system.
3000
3001 We implement this by waking up everywhere at the same time
850516e0 3002 within any given minute if we can, synchronised via the
c2ba3ad6 3003 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
3004 then we try to find the same spot in every 10s, then 1s and
3005 then 250ms step. Otherwise, we pick the last possible time
3006 to wake up.
c2ba3ad6
LP
3007 */
3008
850516e0
LP
3009 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
3010 if (c >= b) {
3011 if (_unlikely_(c < USEC_PER_MINUTE))
3012 return b;
3013
3014 c -= USEC_PER_MINUTE;
3015 }
3016
ba276c81
LP
3017 if (c >= a)
3018 return c;
3019
3020 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
3021 if (c >= b) {
3022 if (_unlikely_(c < USEC_PER_SEC*10))
3023 return b;
3024
3025 c -= USEC_PER_SEC*10;
3026 }
3027
850516e0
LP
3028 if (c >= a)
3029 return c;
3030
3031 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
3032 if (c >= b) {
3033 if (_unlikely_(c < USEC_PER_SEC))
3034 return b;
3035
3036 c -= USEC_PER_SEC;
3037 }
3038
3039 if (c >= a)
3040 return c;
3041
3042 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
3043 if (c >= b) {
3044 if (_unlikely_(c < USEC_PER_MSEC*250))
3045 return b;
3046
3047 c -= USEC_PER_MSEC*250;
3048 }
3049
3050 if (c >= a)
3051 return c;
3052
3053 return b;
3054}
3055
fd38203a
LP
3056static int event_arm_timer(
3057 sd_event *e,
6a0f1f6d 3058 struct clock_data *d) {
fd38203a
LP
3059
3060 struct itimerspec its = {};
c2ba3ad6
LP
3061 sd_event_source *a, *b;
3062 usec_t t;
fd38203a 3063
cde93897 3064 assert(e);
6a0f1f6d 3065 assert(d);
fd38203a 3066
d06441da 3067 if (!d->needs_rearm)
212bbb17 3068 return 0;
7e2bf71c
YW
3069
3070 d->needs_rearm = false;
212bbb17 3071
6a0f1f6d 3072 a = prioq_peek(d->earliest);
19947509 3073 assert(!a || EVENT_SOURCE_USES_TIME_PRIOQ(a->type));
b6d5481b 3074 if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) {
72aedc1e 3075
6a0f1f6d 3076 if (d->fd < 0)
c57b5ca3
LP
3077 return 0;
3078
3a43da28 3079 if (d->next == USEC_INFINITY)
72aedc1e
LP
3080 return 0;
3081
3082 /* disarm */
15c689d7
LP
3083 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3084 return -errno;
72aedc1e 3085
3a43da28 3086 d->next = USEC_INFINITY;
fd38203a 3087 return 0;
72aedc1e 3088 }
fd38203a 3089
6a0f1f6d 3090 b = prioq_peek(d->latest);
19947509
ZJS
3091 assert(!b || EVENT_SOURCE_USES_TIME_PRIOQ(b->type));
3092 assert(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 3093
b6d5481b 3094 t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b));
6a0f1f6d 3095 if (d->next == t)
fd38203a
LP
3096 return 0;
3097
6a0f1f6d 3098 assert_se(d->fd >= 0);
fd38203a 3099
c2ba3ad6 3100 if (t == 0) {
fd38203a
LP
3101 /* We don' want to disarm here, just mean some time looooong ago. */
3102 its.it_value.tv_sec = 0;
3103 its.it_value.tv_nsec = 1;
3104 } else
c2ba3ad6 3105 timespec_store(&its.it_value, t);
fd38203a 3106
15c689d7 3107 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 3108 return -errno;
fd38203a 3109
6a0f1f6d 3110 d->next = t;
fd38203a
LP
3111 return 0;
3112}
3113
9a800b56 3114static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
3115 assert(e);
3116 assert(s);
3117 assert(s->type == SOURCE_IO);
3118
9a800b56
LP
3119 /* If the event source was already pending, we just OR in the
3120 * new revents, otherwise we reset the value. The ORing is
3121 * necessary to handle EPOLLONESHOT events properly where
3122 * readability might happen independently of writability, and
3123 * we need to keep track of both */
3124
3125 if (s->pending)
3126 s->io.revents |= revents;
3127 else
3128 s->io.revents = revents;
fd38203a 3129
fd38203a
LP
3130 return source_set_pending(s, true);
3131}
3132
72aedc1e 3133static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
3134 uint64_t x;
3135 ssize_t ss;
3136
3137 assert(e);
da7e457c 3138 assert(fd >= 0);
72aedc1e 3139
305f78bf 3140 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
3141
3142 ss = read(fd, &x, sizeof(x));
3143 if (ss < 0) {
8add30a0 3144 if (ERRNO_IS_TRANSIENT(errno))
fd38203a
LP
3145 return 0;
3146
3147 return -errno;
3148 }
3149
8d35dae7 3150 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
3151 return -EIO;
3152
cde93897 3153 if (next)
3a43da28 3154 *next = USEC_INFINITY;
72aedc1e 3155
fd38203a
LP
3156 return 0;
3157}
3158
305f78bf
LP
3159static int process_timer(
3160 sd_event *e,
3161 usec_t n,
6a0f1f6d 3162 struct clock_data *d) {
305f78bf 3163
fd38203a 3164 sd_event_source *s;
fd69f224 3165 bool callback_invoked = false;
fd38203a
LP
3166 int r;
3167
3168 assert(e);
6a0f1f6d 3169 assert(d);
fd38203a
LP
3170
3171 for (;;) {
6a0f1f6d 3172 s = prioq_peek(d->earliest);
19947509
ZJS
3173 assert(!s || EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
3174
b6d5481b
LP
3175 if (!s || time_event_source_next(s) > n)
3176 break;
3177
3178 if (s->ratelimited) {
3179 /* This is an event sources whose ratelimit window has ended. Let's turn it on
3180 * again. */
3181 assert(s->ratelimited);
3182
fd69f224 3183 r = event_source_leave_ratelimit(s, /* run_callback */ true);
b6d5481b
LP
3184 if (r < 0)
3185 return r;
fd69f224
MS
3186 else if (r == 1)
3187 callback_invoked = true;
b6d5481b
LP
3188
3189 continue;
3190 }
3191
3192 if (s->enabled == SD_EVENT_OFF || s->pending)
fd38203a
LP
3193 break;
3194
3195 r = source_set_pending(s, true);
3196 if (r < 0)
3197 return r;
3198
e1951c16 3199 event_source_time_prioq_reshuffle(s);
fd38203a
LP
3200 }
3201
fd69f224 3202 return callback_invoked;
fd38203a
LP
3203}
3204
efd3be9d
YW
3205static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
3206 int64_t min_priority = threshold;
3207 bool something_new = false;
fd38203a 3208 sd_event_source *s;
fd38203a
LP
3209 int r;
3210
3211 assert(e);
efd3be9d
YW
3212 assert(ret_min_priority);
3213
3214 if (!e->need_process_child) {
3215 *ret_min_priority = min_priority;
3216 return 0;
3217 }
fd38203a 3218
c2ba3ad6
LP
3219 e->need_process_child = false;
3220
fd38203a
LP
3221 /*
3222 So, this is ugly. We iteratively invoke waitid() with P_PID
3223 + WNOHANG for each PID we wait for, instead of using
3224 P_ALL. This is because we only want to get child
3225 information of very specific child processes, and not all
3226 of them. We might not have processed the SIGCHLD even of a
3227 previous invocation and we don't want to maintain a
3228 unbounded *per-child* event queue, hence we really don't
3229 want anything flushed out of the kernel's queue that we
3230 don't care about. Since this is O(n) this means that if you
3231 have a lot of processes you probably want to handle SIGCHLD
3232 yourself.
08cd1552
LP
3233
3234 We do not reap the children here (by using WNOWAIT), this
3235 is only done after the event source is dispatched so that
3236 the callback still sees the process as a zombie.
fd38203a
LP
3237 */
3238
90e74a66 3239 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
3240 assert(s->type == SOURCE_CHILD);
3241
efd3be9d
YW
3242 if (s->priority > threshold)
3243 continue;
3244
fd38203a
LP
3245 if (s->pending)
3246 continue;
3247
b6d5481b 3248 if (event_source_is_offline(s))
fd38203a
LP
3249 continue;
3250
f8f3f926
LP
3251 if (s->child.exited)
3252 continue;
3253
3254 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
3255 continue;
3256
fd38203a 3257 zero(s->child.siginfo);
15c689d7
LP
3258 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
3259 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
bfd9bfcc 3260 return negative_errno();
fd38203a
LP
3261
3262 if (s->child.siginfo.si_pid != 0) {
945c2931 3263 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3264
f8f3f926
LP
3265 if (zombie)
3266 s->child.exited = true;
3267
08cd1552
LP
3268 if (!zombie && (s->child.options & WEXITED)) {
3269 /* If the child isn't dead then let's
3270 * immediately remove the state change
3271 * from the queue, since there's no
3272 * benefit in leaving it queued */
3273
3274 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 3275 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
3276 }
3277
fd38203a
LP
3278 r = source_set_pending(s, true);
3279 if (r < 0)
3280 return r;
efd3be9d
YW
3281 if (r > 0) {
3282 something_new = true;
3283 min_priority = MIN(min_priority, s->priority);
3284 }
fd38203a
LP
3285 }
3286 }
3287
efd3be9d
YW
3288 *ret_min_priority = min_priority;
3289 return something_new;
fd38203a
LP
3290}
3291
f8f3f926
LP
3292static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
3293 assert(e);
3294 assert(s);
3295 assert(s->type == SOURCE_CHILD);
3296
3297 if (s->pending)
3298 return 0;
3299
b6d5481b 3300 if (event_source_is_offline(s))
f8f3f926
LP
3301 return 0;
3302
3303 if (!EVENT_SOURCE_WATCH_PIDFD(s))
3304 return 0;
3305
3306 zero(s->child.siginfo);
3307 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
3308 return -errno;
3309
3310 if (s->child.siginfo.si_pid == 0)
3311 return 0;
3312
3313 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
3314 s->child.exited = true;
3315
3316 return source_set_pending(s, true);
3317}
3318
efd3be9d 3319static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
fd38203a
LP
3320 int r;
3321
da7e457c 3322 assert(e);
97ef5391 3323 assert(d);
305f78bf 3324 assert_return(events == EPOLLIN, -EIO);
efd3be9d 3325 assert(min_priority);
fd38203a 3326
9da4cb2b
LP
3327 /* If there's a signal queued on this priority and SIGCHLD is
3328 on this priority too, then make sure to recheck the
3329 children we watch. This is because we only ever dequeue
3330 the first signal per priority, and if we dequeue one, and
3331 SIGCHLD might be enqueued later we wouldn't know, but we
3332 might have higher priority children we care about hence we
3333 need to check that explicitly. */
3334
3335 if (sigismember(&d->sigset, SIGCHLD))
3336 e->need_process_child = true;
3337
3338 /* If there's already an event source pending for this
3339 * priority we don't read another */
3340 if (d->current)
3341 return 0;
3342
fd38203a 3343 for (;;) {
0eb2e0e3 3344 struct signalfd_siginfo si;
7057bd99 3345 ssize_t n;
92daebc0 3346 sd_event_source *s = NULL;
fd38203a 3347
9da4cb2b 3348 n = read(d->fd, &si, sizeof(si));
7057bd99 3349 if (n < 0) {
8add30a0 3350 if (ERRNO_IS_TRANSIENT(errno))
efd3be9d 3351 return 0;
fd38203a
LP
3352
3353 return -errno;
3354 }
3355
7057bd99 3356 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3357 return -EIO;
3358
6eb7c172 3359 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3360
92daebc0
LP
3361 if (e->signal_sources)
3362 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3363 if (!s)
3364 continue;
9da4cb2b
LP
3365 if (s->pending)
3366 continue;
fd38203a
LP
3367
3368 s->signal.siginfo = si;
9da4cb2b
LP
3369 d->current = s;
3370
fd38203a
LP
3371 r = source_set_pending(s, true);
3372 if (r < 0)
3373 return r;
efd3be9d
YW
3374 if (r > 0 && *min_priority >= s->priority) {
3375 *min_priority = s->priority;
3376 return 1; /* an event source with smaller priority is queued. */
3377 }
9da4cb2b 3378
efd3be9d 3379 return 0;
fd38203a 3380 }
fd38203a
LP
3381}
3382
efd3be9d 3383static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) {
97ef5391
LP
3384 ssize_t n;
3385
3386 assert(e);
3387 assert(d);
3388
3389 assert_return(revents == EPOLLIN, -EIO);
3390
3391 /* If there's already an event source pending for this priority, don't read another */
3392 if (d->n_pending > 0)
3393 return 0;
3394
3395 /* Is the read buffer non-empty? If so, let's not read more */
3396 if (d->buffer_filled > 0)
3397 return 0;
3398
efd3be9d
YW
3399 if (d->priority > threshold)
3400 return 0;
3401
97ef5391
LP
3402 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3403 if (n < 0) {
8add30a0 3404 if (ERRNO_IS_TRANSIENT(errno))
97ef5391
LP
3405 return 0;
3406
3407 return -errno;
3408 }
3409
3410 assert(n > 0);
3411 d->buffer_filled = (size_t) n;
3412 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3413
3414 return 1;
3415}
3416
3417static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3418 assert(e);
3419 assert(d);
3420 assert(sz <= d->buffer_filled);
3421
3422 if (sz == 0)
3423 return;
3424
3425 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3426 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3427 d->buffer_filled -= sz;
3428
3429 if (d->buffer_filled == 0)
3430 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3431}
3432
3433static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3434 int r;
3435
3436 assert(e);
3437 assert(d);
3438
3439 /* If there's already an event source pending for this priority, don't read another */
3440 if (d->n_pending > 0)
3441 return 0;
3442
3443 while (d->buffer_filled > 0) {
3444 size_t sz;
3445
3446 /* Let's validate that the event structures are complete */
3447 if (d->buffer_filled < offsetof(struct inotify_event, name))
3448 return -EIO;
3449
3450 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3451 if (d->buffer_filled < sz)
3452 return -EIO;
3453
3454 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3455 struct inode_data *inode_data;
97ef5391
LP
3456
3457 /* The queue overran, let's pass this event to all event sources connected to this inotify
3458 * object */
3459
03677889 3460 HASHMAP_FOREACH(inode_data, d->inodes)
97ef5391
LP
3461 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3462
b6d5481b 3463 if (event_source_is_offline(s))
97ef5391
LP
3464 continue;
3465
3466 r = source_set_pending(s, true);
3467 if (r < 0)
3468 return r;
3469 }
97ef5391
LP
3470 } else {
3471 struct inode_data *inode_data;
97ef5391
LP
3472
3473 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3474 * our watch descriptor table. */
3475 if (d->buffer.ev.mask & IN_IGNORED) {
3476
3477 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3478 if (!inode_data) {
3479 event_inotify_data_drop(e, d, sz);
3480 continue;
3481 }
3482
3483 /* The watch descriptor was removed by the kernel, let's drop it here too */
3484 inode_data->wd = -1;
3485 } else {
3486 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3487 if (!inode_data) {
3488 event_inotify_data_drop(e, d, sz);
3489 continue;
3490 }
3491 }
3492
3493 /* Trigger all event sources that are interested in these events. Also trigger all event
3494 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3495 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3496
b6d5481b 3497 if (event_source_is_offline(s))
97ef5391
LP
3498 continue;
3499
3500 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3501 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3502 continue;
3503
3504 r = source_set_pending(s, true);
3505 if (r < 0)
3506 return r;
3507 }
3508 }
3509
3510 /* Something pending now? If so, let's finish, otherwise let's read more. */
3511 if (d->n_pending > 0)
3512 return 1;
3513 }
3514
3515 return 0;
3516}
3517
3518static int process_inotify(sd_event *e) {
97ef5391
LP
3519 int r, done = 0;
3520
3521 assert(e);
3522
3523 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3524 r = event_inotify_data_process(e, d);
3525 if (r < 0)
3526 return r;
3527 if (r > 0)
3528 done ++;
3529 }
3530
3531 return done;
3532}
3533
fd38203a 3534static int source_dispatch(sd_event_source *s) {
b778cba4 3535 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
8f5c235d 3536 EventSourceType saved_type;
fe8245eb 3537 int r = 0;
fd38203a
LP
3538
3539 assert(s);
6203e07a 3540 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3541
b778cba4
LP
3542 /* Save the event source type, here, so that we still know it after the event callback which might
3543 * invalidate the event. */
8f5c235d
LP
3544 saved_type = s->type;
3545
de02634c 3546 /* Similarly, store a reference to the event loop object, so that we can still access it after the
b778cba4
LP
3547 * callback might have invalidated/disconnected the event source. */
3548 saved_event = sd_event_ref(s->event);
3549
de02634c 3550 /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
b6d5481b
LP
3551 assert(!s->ratelimited);
3552 if (!ratelimit_below(&s->rate_limit)) {
3553 r = event_source_enter_ratelimited(s);
3554 if (r < 0)
3555 return r;
3556
3557 return 1;
3558 }
3559
945c2931 3560 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3561 r = source_set_pending(s, false);
3562 if (r < 0)
3563 return r;
3564 }
fd38203a 3565
6e9feda3
LP
3566 if (s->type != SOURCE_POST) {
3567 sd_event_source *z;
6e9feda3 3568
de02634c 3569 /* If we execute a non-post source, let's mark all post sources as pending. */
6e9feda3 3570
90e74a66 3571 SET_FOREACH(z, s->event->post_sources) {
b6d5481b 3572 if (event_source_is_offline(z))
6e9feda3
LP
3573 continue;
3574
3575 r = source_set_pending(z, true);
3576 if (r < 0)
3577 return r;
3578 }
3579 }
3580
baf76283
LP
3581 if (s->enabled == SD_EVENT_ONESHOT) {
3582 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3583 if (r < 0)
3584 return r;
3585 }
3586
12179984 3587 s->dispatching = true;
b7484e2a 3588
fd38203a
LP
3589 switch (s->type) {
3590
3591 case SOURCE_IO:
3592 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3593 break;
3594
6a0f1f6d 3595 case SOURCE_TIME_REALTIME:
a8548816 3596 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3597 case SOURCE_TIME_MONOTONIC:
3598 case SOURCE_TIME_REALTIME_ALARM:
3599 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3600 r = s->time.callback(s, s->time.next, s->userdata);
3601 break;
3602
3603 case SOURCE_SIGNAL:
3604 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3605 break;
3606
08cd1552
LP
3607 case SOURCE_CHILD: {
3608 bool zombie;
3609
945c2931 3610 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3611
fd38203a 3612 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3613
3614 /* Now, reap the PID for good. */
f8f3f926 3615 if (zombie) {
cc59d290 3616 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3617 s->child.waited = true;
3618 }
08cd1552 3619
fd38203a 3620 break;
08cd1552 3621 }
fd38203a
LP
3622
3623 case SOURCE_DEFER:
3624 r = s->defer.callback(s, s->userdata);
3625 break;
da7e457c 3626
6e9feda3
LP
3627 case SOURCE_POST:
3628 r = s->post.callback(s, s->userdata);
3629 break;
3630
6203e07a
LP
3631 case SOURCE_EXIT:
3632 r = s->exit.callback(s, s->userdata);
da7e457c 3633 break;
9d3e3aa5 3634
97ef5391
LP
3635 case SOURCE_INOTIFY: {
3636 struct sd_event *e = s->event;
3637 struct inotify_data *d;
3638 size_t sz;
3639
3640 assert(s->inotify.inode_data);
3641 assert_se(d = s->inotify.inode_data->inotify_data);
3642
3643 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3644 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3645 assert(d->buffer_filled >= sz);
3646
53baf2ef
LP
3647 /* If the inotify callback destroys the event source then this likely means we don't need to
3648 * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd
3649 * free it immediately, then we couldn't drop the event from the inotify event queue without
3650 * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it
3651 * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then
3652 * explicitly GC it after we are done dropping the inotify event from the buffer. */
3653 d->n_busy++;
97ef5391 3654 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
53baf2ef 3655 d->n_busy--;
97ef5391 3656
53baf2ef
LP
3657 /* When no event is pending anymore on this inotify object, then let's drop the event from
3658 * the inotify event queue buffer. */
97ef5391
LP
3659 if (d->n_pending == 0)
3660 event_inotify_data_drop(e, d, sz);
3661
53baf2ef
LP
3662 /* Now we don't want to access 'd' anymore, it's OK to GC now. */
3663 event_gc_inotify_data(e, d);
97ef5391
LP
3664 break;
3665 }
3666
9d3e3aa5 3667 case SOURCE_WATCHDOG:
a71fe8b8 3668 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3669 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
04499a70 3670 assert_not_reached();
fd38203a
LP
3671 }
3672
12179984
LP
3673 s->dispatching = false;
3674
b778cba4
LP
3675 if (r < 0) {
3676 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3677 strna(s->description),
3678 event_source_type_to_string(saved_type),
3679 s->exit_on_failure ? "exiting" : "disabling");
3680
3681 if (s->exit_on_failure)
3682 (void) sd_event_exit(saved_event, r);
3683 }
12179984
LP
3684
3685 if (s->n_ref == 0)
3686 source_free(s);
3687 else if (r < 0)
c3c50474 3688 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
b7484e2a 3689
6203e07a 3690 return 1;
fd38203a
LP
3691}
3692
3693static int event_prepare(sd_event *e) {
3694 int r;
3695
3696 assert(e);
3697
3698 for (;;) {
3699 sd_event_source *s;
3700
3701 s = prioq_peek(e->prepare);
b6d5481b 3702 if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s))
fd38203a
LP
3703 break;
3704
3705 s->prepare_iteration = e->iteration;
3706 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3707 if (r < 0)
3708 return r;
3709
3710 assert(s->prepare);
12179984
LP
3711
3712 s->dispatching = true;
fd38203a 3713 r = s->prepare(s, s->userdata);
12179984
LP
3714 s->dispatching = false;
3715
b778cba4
LP
3716 if (r < 0) {
3717 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3718 strna(s->description),
3719 event_source_type_to_string(s->type),
3720 s->exit_on_failure ? "exiting" : "disabling");
3721
3722 if (s->exit_on_failure)
3723 (void) sd_event_exit(e, r);
3724 }
fd38203a 3725
12179984
LP
3726 if (s->n_ref == 0)
3727 source_free(s);
3728 else if (r < 0)
c3c50474 3729 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
fd38203a
LP
3730 }
3731
3732 return 0;
3733}
3734
6203e07a 3735static int dispatch_exit(sd_event *e) {
da7e457c
LP
3736 sd_event_source *p;
3737 int r;
3738
3739 assert(e);
3740
6203e07a 3741 p = prioq_peek(e->exit);
19947509
ZJS
3742 assert(!p || p->type == SOURCE_EXIT);
3743
b6d5481b 3744 if (!p || event_source_is_offline(p)) {
da7e457c
LP
3745 e->state = SD_EVENT_FINISHED;
3746 return 0;
3747 }
3748
f814c871 3749 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
da7e457c 3750 e->iteration++;
6203e07a 3751 e->state = SD_EVENT_EXITING;
da7e457c 3752 r = source_dispatch(p);
2b0c9ef7 3753 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3754 return r;
3755}
3756
c2ba3ad6
LP
3757static sd_event_source* event_next_pending(sd_event *e) {
3758 sd_event_source *p;
3759
da7e457c
LP
3760 assert(e);
3761
c2ba3ad6
LP
3762 p = prioq_peek(e->pending);
3763 if (!p)
3764 return NULL;
3765
b6d5481b 3766 if (event_source_is_offline(p))
c2ba3ad6
LP
3767 return NULL;
3768
3769 return p;
3770}
3771
cde93897
LP
3772static int arm_watchdog(sd_event *e) {
3773 struct itimerspec its = {};
3774 usec_t t;
cde93897
LP
3775
3776 assert(e);
3777 assert(e->watchdog_fd >= 0);
3778
3779 t = sleep_between(e,
a595fb5c
YW
3780 usec_add(e->watchdog_last, (e->watchdog_period / 2)),
3781 usec_add(e->watchdog_last, (e->watchdog_period * 3 / 4)));
cde93897
LP
3782
3783 timespec_store(&its.it_value, t);
3784
75145780
LP
3785 /* Make sure we never set the watchdog to 0, which tells the
3786 * kernel to disable it. */
3787 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3788 its.it_value.tv_nsec = 1;
3789
7c248223 3790 return RET_NERRNO(timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL));
cde93897
LP
3791}
3792
3793static int process_watchdog(sd_event *e) {
3794 assert(e);
3795
3796 if (!e->watchdog)
3797 return 0;
3798
3799 /* Don't notify watchdog too often */
3800 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3801 return 0;
3802
3803 sd_notify(false, "WATCHDOG=1");
3804 e->watchdog_last = e->timestamp.monotonic;
3805
3806 return arm_watchdog(e);
3807}
3808
97ef5391
LP
3809static void event_close_inode_data_fds(sd_event *e) {
3810 struct inode_data *d;
3811
3812 assert(e);
3813
3814 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3815 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3816 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3817 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3818 * compromise. */
3819
3820 while ((d = e->inode_data_to_close)) {
3821 assert(d->fd >= 0);
3822 d->fd = safe_close(d->fd);
3823
3824 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3825 }
3826}
3827
c45a5a74
TG
3828_public_ int sd_event_prepare(sd_event *e) {
3829 int r;
fd38203a 3830
da7e457c 3831 assert_return(e, -EINVAL);
b937d761 3832 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3833 assert_return(!event_pid_changed(e), -ECHILD);
3834 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3835 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3836
e5446015
LP
3837 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3838 * this check here once, since gettid() is typically not cached, and thus want to minimize
3839 * syscalls */
3840 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3841
f814c871
LP
3842 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
3843 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3844
6203e07a 3845 if (e->exit_requested)
c45a5a74 3846 goto pending;
fd38203a
LP
3847
3848 e->iteration++;
3849
0be6c2f6 3850 e->state = SD_EVENT_PREPARING;
fd38203a 3851 r = event_prepare(e);
0be6c2f6 3852 e->state = SD_EVENT_INITIAL;
fd38203a 3853 if (r < 0)
c45a5a74 3854 return r;
fd38203a 3855
6a0f1f6d
LP
3856 r = event_arm_timer(e, &e->realtime);
3857 if (r < 0)
c45a5a74 3858 return r;
6a0f1f6d 3859
a8548816
TG
3860 r = event_arm_timer(e, &e->boottime);
3861 if (r < 0)
c45a5a74 3862 return r;
a8548816 3863
6a0f1f6d
LP
3864 r = event_arm_timer(e, &e->monotonic);
3865 if (r < 0)
c45a5a74 3866 return r;
6a0f1f6d
LP
3867
3868 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3869 if (r < 0)
c45a5a74 3870 return r;
fd38203a 3871
6a0f1f6d 3872 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3873 if (r < 0)
c45a5a74 3874 return r;
fd38203a 3875
97ef5391
LP
3876 event_close_inode_data_fds(e);
3877
1b5995b0 3878 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3879 goto pending;
3880
2b0c9ef7 3881 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3882
3883 return 0;
3884
3885pending:
2b0c9ef7 3886 e->state = SD_EVENT_ARMED;
6d148a84
TG
3887 r = sd_event_wait(e, 0);
3888 if (r == 0)
2b0c9ef7 3889 e->state = SD_EVENT_ARMED;
6d148a84
TG
3890
3891 return r;
c45a5a74
TG
3892}
3893
798445ab
LP
3894static int epoll_wait_usec(
3895 int fd,
3896 struct epoll_event *events,
3897 int maxevents,
3898 usec_t timeout) {
3899
7c248223 3900 int msec;
39f756d3
ZJS
3901#if 0
3902 static bool epoll_pwait2_absent = false;
52bb308c 3903 int r;
798445ab 3904
39f756d3
ZJS
3905 /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not.
3906 *
3907 * FIXME: this is temporarily disabled until epoll_pwait2() becomes more widely available.
3908 * See https://github.com/systemd/systemd/pull/18973 and
3909 * https://github.com/systemd/systemd/issues/19052. */
798445ab
LP
3910
3911 if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
798445ab
LP
3912 r = epoll_pwait2(fd,
3913 events,
3914 maxevents,
52bb308c 3915 TIMESPEC_STORE(timeout),
798445ab
LP
3916 NULL);
3917 if (r >= 0)
3918 return r;
7cb45dbf 3919 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
798445ab
LP
3920 return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
3921 * supported. */
3922
3923 epoll_pwait2_absent = true;
3924 }
39f756d3 3925#endif
798445ab
LP
3926
3927 if (timeout == USEC_INFINITY)
3928 msec = -1;
3929 else {
3930 usec_t k;
3931
3932 k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
3933 if (k >= INT_MAX)
3934 msec = INT_MAX; /* Saturate */
3935 else
3936 msec = (int) k;
3937 }
3938
7c248223 3939 return RET_NERRNO(epoll_wait(fd, events, maxevents, msec));
798445ab
LP
3940}
3941
efd3be9d 3942static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
319a4f4b 3943 size_t n_event_queue, m, n_event_max;
efd3be9d
YW
3944 int64_t min_priority = threshold;
3945 bool something_new = false;
798445ab 3946 int r;
c45a5a74 3947
efd3be9d
YW
3948 assert(e);
3949 assert(ret_min_priority);
6a0f1f6d 3950
8b9708d1 3951 n_event_queue = MAX(e->n_sources, 1u);
319a4f4b 3952 if (!GREEDY_REALLOC(e->event_queue, n_event_queue))
5cddd924 3953 return -ENOMEM;
fd38203a 3954
319a4f4b
LP
3955 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
3956
97ef5391
LP
3957 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3958 if (e->inotify_data_buffered)
798445ab 3959 timeout = 0;
97ef5391 3960
8b9708d1 3961 for (;;) {
319a4f4b
LP
3962 r = epoll_wait_usec(
3963 e->epoll_fd,
3964 e->event_queue,
3965 n_event_max,
3966 timeout);
798445ab 3967 if (r < 0)
efd3be9d 3968 return r;
c45a5a74 3969
8b9708d1
YW
3970 m = (size_t) r;
3971
319a4f4b 3972 if (m < n_event_max)
8b9708d1
YW
3973 break;
3974
319a4f4b 3975 if (n_event_max >= n_event_queue * 10)
8b9708d1
YW
3976 break;
3977
319a4f4b 3978 if (!GREEDY_REALLOC(e->event_queue, n_event_max + n_event_queue))
8b9708d1
YW
3979 return -ENOMEM;
3980
319a4f4b 3981 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
798445ab 3982 timeout = 0;
da7e457c 3983 }
fd38203a 3984
efd3be9d
YW
3985 /* Set timestamp only when this is called first time. */
3986 if (threshold == INT64_MAX)
3987 triple_timestamp_get(&e->timestamp);
fd38203a 3988
8b9708d1 3989 for (size_t i = 0; i < m; i++) {
fd38203a 3990
5cddd924
LP
3991 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3992 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 3993 else {
5cddd924 3994 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
3995
3996 switch (*t) {
3997
f8f3f926 3998 case WAKEUP_EVENT_SOURCE: {
5cddd924 3999 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
4000
4001 assert(s);
4002
efd3be9d
YW
4003 if (s->priority > threshold)
4004 continue;
4005
4006 min_priority = MIN(min_priority, s->priority);
4007
f8f3f926
LP
4008 switch (s->type) {
4009
4010 case SOURCE_IO:
5cddd924 4011 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
4012 break;
4013
4014 case SOURCE_CHILD:
5cddd924 4015 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
4016 break;
4017
4018 default:
04499a70 4019 assert_not_reached();
f8f3f926
LP
4020 }
4021
9da4cb2b 4022 break;
f8f3f926 4023 }
fd38203a 4024
9da4cb2b 4025 case WAKEUP_CLOCK_DATA: {
5cddd924 4026 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
4027
4028 assert(d);
4029
5cddd924 4030 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
4031 break;
4032 }
4033
4034 case WAKEUP_SIGNAL_DATA:
efd3be9d 4035 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority);
9da4cb2b
LP
4036 break;
4037
97ef5391 4038 case WAKEUP_INOTIFY_DATA:
efd3be9d 4039 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold);
97ef5391
LP
4040 break;
4041
9da4cb2b 4042 default:
04499a70 4043 assert_not_reached();
9da4cb2b
LP
4044 }
4045 }
efd3be9d
YW
4046 if (r < 0)
4047 return r;
4048 if (r > 0)
4049 something_new = true;
4050 }
4051
4052 *ret_min_priority = min_priority;
4053 return something_new;
4054}
4055
4056_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
4057 int r;
4058
4059 assert_return(e, -EINVAL);
4060 assert_return(e = event_resolve(e), -ENOPKG);
4061 assert_return(!event_pid_changed(e), -ECHILD);
4062 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4063 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
4064
4065 if (e->exit_requested) {
4066 e->state = SD_EVENT_PENDING;
4067 return 1;
4068 }
4069
4070 for (int64_t threshold = INT64_MAX; ; threshold--) {
4071 int64_t epoll_min_priority, child_min_priority;
4072
4073 /* There may be a possibility that new epoll (especially IO) and child events are
4074 * triggered just after process_epoll() call but before process_child(), and the new IO
4075 * events may have higher priority than the child events. To salvage these events,
4076 * let's call epoll_wait() again, but accepts only events with higher priority than the
4077 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
4078 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
4079 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
4080
4081 r = process_epoll(e, timeout, threshold, &epoll_min_priority);
4082 if (r == -EINTR) {
4083 e->state = SD_EVENT_PENDING;
4084 return 1;
4085 }
4086 if (r < 0)
4087 goto finish;
4088 if (r == 0 && threshold < INT64_MAX)
4089 /* No new epoll event. */
4090 break;
4091
4092 r = process_child(e, threshold, &child_min_priority);
fd38203a 4093 if (r < 0)
da7e457c 4094 goto finish;
efd3be9d
YW
4095 if (r == 0)
4096 /* No new child event. */
4097 break;
4098
4099 threshold = MIN(epoll_min_priority, child_min_priority);
4100 if (threshold == INT64_MIN)
4101 break;
4102
4103 timeout = 0;
fd38203a
LP
4104 }
4105
cde93897
LP
4106 r = process_watchdog(e);
4107 if (r < 0)
4108 goto finish;
4109
fd69f224 4110 r = process_inotify(e);
6a0f1f6d
LP
4111 if (r < 0)
4112 goto finish;
4113
fd69f224 4114 r = process_timer(e, e->timestamp.realtime, &e->realtime);
a8548816
TG
4115 if (r < 0)
4116 goto finish;
4117
fd69f224 4118 r = process_timer(e, e->timestamp.boottime, &e->boottime);
6a0f1f6d
LP
4119 if (r < 0)
4120 goto finish;
4121
4122 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 4123 if (r < 0)
da7e457c 4124 goto finish;
fd38203a 4125
e475d10c 4126 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 4127 if (r < 0)
da7e457c 4128 goto finish;
fd38203a 4129
fd69f224 4130 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
97ef5391
LP
4131 if (r < 0)
4132 goto finish;
fd69f224
MS
4133 else if (r == 1) {
4134 /* Ratelimit expiry callback was called. Let's postpone processing pending sources and
4135 * put loop in the initial state in order to evaluate (in the next iteration) also sources
4136 * there were potentially re-enabled by the callback.
4137 *
4138 * Wondering why we treat only this invocation of process_timer() differently? Once event
4139 * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence
4140 * ratelimit expiry callback is never called for any other timer type. */
4141 r = 0;
4142 goto finish;
4143 }
97ef5391 4144
c45a5a74
TG
4145 if (event_next_pending(e)) {
4146 e->state = SD_EVENT_PENDING;
c45a5a74 4147 return 1;
da7e457c
LP
4148 }
4149
c45a5a74 4150 r = 0;
fd38203a 4151
da7e457c 4152finish:
2b0c9ef7 4153 e->state = SD_EVENT_INITIAL;
da7e457c
LP
4154
4155 return r;
fd38203a
LP
4156}
4157
c45a5a74
TG
4158_public_ int sd_event_dispatch(sd_event *e) {
4159 sd_event_source *p;
4160 int r;
4161
4162 assert_return(e, -EINVAL);
b937d761 4163 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4164 assert_return(!event_pid_changed(e), -ECHILD);
4165 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4166 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
4167
4168 if (e->exit_requested)
4169 return dispatch_exit(e);
4170
4171 p = event_next_pending(e);
4172 if (p) {
f814c871 4173 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
c45a5a74
TG
4174
4175 e->state = SD_EVENT_RUNNING;
4176 r = source_dispatch(p);
2b0c9ef7 4177 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4178 return r;
4179 }
4180
2b0c9ef7 4181 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4182
4183 return 1;
4184}
4185
34b87517 4186static void event_log_delays(sd_event *e) {
442ac269
YW
4187 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
4188 size_t l, i;
34b87517 4189
442ac269
YW
4190 p = b;
4191 l = sizeof(b);
4192 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
4193 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
4194 e->delays[i] = 0;
4195 }
442ac269 4196 log_debug("Event loop iterations: %s", b);
34b87517
VC
4197}
4198
c45a5a74
TG
4199_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
4200 int r;
4201
4202 assert_return(e, -EINVAL);
b937d761 4203 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4204 assert_return(!event_pid_changed(e), -ECHILD);
4205 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 4206 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 4207
e6a7bee5 4208 if (e->profile_delays && e->last_run_usec != 0) {
34b87517
VC
4209 usec_t this_run;
4210 unsigned l;
4211
4212 this_run = now(CLOCK_MONOTONIC);
4213
58c34be8 4214 l = log2u64(this_run - e->last_run_usec);
cb9d621e 4215 assert(l < ELEMENTSOF(e->delays));
34b87517
VC
4216 e->delays[l]++;
4217
e6a7bee5 4218 if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
34b87517 4219 event_log_delays(e);
e6a7bee5 4220 e->last_log_usec = this_run;
34b87517
VC
4221 }
4222 }
4223
f814c871
LP
4224 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4225 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
4226
c45a5a74 4227 r = sd_event_prepare(e);
53bac4e0
LP
4228 if (r == 0)
4229 /* There was nothing? Then wait... */
4230 r = sd_event_wait(e, timeout);
c45a5a74 4231
34b87517 4232 if (e->profile_delays)
e6a7bee5 4233 e->last_run_usec = now(CLOCK_MONOTONIC);
34b87517 4234
02d30981 4235 if (r > 0) {
53bac4e0 4236 /* There's something now, then let's dispatch it */
02d30981
TG
4237 r = sd_event_dispatch(e);
4238 if (r < 0)
4239 return r;
53bac4e0
LP
4240
4241 return 1;
4242 }
4243
4244 return r;
c45a5a74
TG
4245}
4246
f7262a9f 4247_public_ int sd_event_loop(sd_event *e) {
fd38203a
LP
4248 int r;
4249
da7e457c 4250 assert_return(e, -EINVAL);
b937d761 4251 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 4252 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 4253 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 4254
9f6ef467 4255 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
fd38203a 4256
da7e457c 4257 while (e->state != SD_EVENT_FINISHED) {
f5fbe71d 4258 r = sd_event_run(e, UINT64_MAX);
fd38203a 4259 if (r < 0)
30dd293c 4260 return r;
fd38203a
LP
4261 }
4262
30dd293c 4263 return e->exit_code;
fd38203a
LP
4264}
4265
9b364545 4266_public_ int sd_event_get_fd(sd_event *e) {
9b364545 4267 assert_return(e, -EINVAL);
b937d761 4268 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
4269 assert_return(!event_pid_changed(e), -ECHILD);
4270
4271 return e->epoll_fd;
4272}
4273
f7262a9f 4274_public_ int sd_event_get_state(sd_event *e) {
da7e457c 4275 assert_return(e, -EINVAL);
b937d761 4276 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4277 assert_return(!event_pid_changed(e), -ECHILD);
4278
4279 return e->state;
4280}
4281
6203e07a 4282_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 4283 assert_return(e, -EINVAL);
b937d761 4284 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 4285 assert_return(code, -EINVAL);
da7e457c 4286 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4287
6203e07a
LP
4288 if (!e->exit_requested)
4289 return -ENODATA;
4290
4291 *code = e->exit_code;
4292 return 0;
fd38203a
LP
4293}
4294
6203e07a 4295_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 4296 assert_return(e, -EINVAL);
b937d761 4297 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4298 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4299 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4300
6203e07a
LP
4301 e->exit_requested = true;
4302 e->exit_code = code;
4303
fd38203a
LP
4304 return 0;
4305}
46e8c825 4306
6a0f1f6d 4307_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 4308 assert_return(e, -EINVAL);
b937d761 4309 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 4310 assert_return(usec, -EINVAL);
46e8c825
LP
4311 assert_return(!event_pid_changed(e), -ECHILD);
4312
e475d10c
LP
4313 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
4314 return -EOPNOTSUPP;
4315
4316 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
4317 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
4318 * the purpose of getting the time this doesn't matter. */
3411372e
LP
4319 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
4320 return -EOPNOTSUPP;
4321
e475d10c 4322 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 4323 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
4324 *usec = now(clock);
4325 return 1;
4326 }
46e8c825 4327
e475d10c 4328 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
4329 return 0;
4330}
afc6adb5
LP
4331
4332_public_ int sd_event_default(sd_event **ret) {
39883f62 4333 sd_event *e = NULL;
afc6adb5
LP
4334 int r;
4335
4336 if (!ret)
4337 return !!default_event;
4338
4339 if (default_event) {
4340 *ret = sd_event_ref(default_event);
4341 return 0;
4342 }
4343
4344 r = sd_event_new(&e);
4345 if (r < 0)
4346 return r;
4347
4348 e->default_event_ptr = &default_event;
4349 e->tid = gettid();
4350 default_event = e;
4351
4352 *ret = e;
4353 return 1;
4354}
4355
4356_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
4357 assert_return(e, -EINVAL);
b937d761 4358 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 4359 assert_return(tid, -EINVAL);
76b54375 4360 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 4361
76b54375
LP
4362 if (e->tid != 0) {
4363 *tid = e->tid;
4364 return 0;
4365 }
4366
4367 return -ENXIO;
afc6adb5 4368}
cde93897
LP
4369
4370_public_ int sd_event_set_watchdog(sd_event *e, int b) {
4371 int r;
4372
4373 assert_return(e, -EINVAL);
b937d761 4374 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 4375 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
4376
4377 if (e->watchdog == !!b)
4378 return e->watchdog;
4379
4380 if (b) {
09812eb7
LP
4381 r = sd_watchdog_enabled(false, &e->watchdog_period);
4382 if (r <= 0)
cde93897 4383 return r;
cde93897
LP
4384
4385 /* Issue first ping immediately */
4386 sd_notify(false, "WATCHDOG=1");
4387 e->watchdog_last = now(CLOCK_MONOTONIC);
4388
4389 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
4390 if (e->watchdog_fd < 0)
4391 return -errno;
4392
4393 r = arm_watchdog(e);
4394 if (r < 0)
4395 goto fail;
4396
1eac7948 4397 struct epoll_event ev = {
a82f89aa
LP
4398 .events = EPOLLIN,
4399 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
4400 };
cde93897 4401
15c689d7 4402 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
4403 r = -errno;
4404 goto fail;
4405 }
4406
4407 } else {
4408 if (e->watchdog_fd >= 0) {
5a795bff 4409 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 4410 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4411 }
4412 }
4413
4414 e->watchdog = !!b;
4415 return e->watchdog;
4416
4417fail:
03e334a1 4418 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4419 return r;
4420}
8f726607
LP
4421
4422_public_ int sd_event_get_watchdog(sd_event *e) {
4423 assert_return(e, -EINVAL);
b937d761 4424 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
4425 assert_return(!event_pid_changed(e), -ECHILD);
4426
4427 return e->watchdog;
4428}
60a3b1e1
LP
4429
4430_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
4431 assert_return(e, -EINVAL);
b937d761 4432 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
4433 assert_return(!event_pid_changed(e), -ECHILD);
4434
4435 *ret = e->iteration;
4436 return 0;
4437}
15723a1d
LP
4438
4439_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
4440 assert_return(s, -EINVAL);
4441
4442 s->destroy_callback = callback;
4443 return 0;
4444}
4445
4446_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
4447 assert_return(s, -EINVAL);
4448
4449 if (ret)
4450 *ret = s->destroy_callback;
4451
4452 return !!s->destroy_callback;
4453}
2382c936
YW
4454
4455_public_ int sd_event_source_get_floating(sd_event_source *s) {
4456 assert_return(s, -EINVAL);
4457
4458 return s->floating;
4459}
4460
4461_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
4462 assert_return(s, -EINVAL);
4463
4464 if (s->floating == !!b)
4465 return 0;
4466
4467 if (!s->event) /* Already disconnected */
4468 return -ESTALE;
4469
4470 s->floating = b;
4471
4472 if (b) {
4473 sd_event_source_ref(s);
4474 sd_event_unref(s->event);
4475 } else {
4476 sd_event_ref(s->event);
4477 sd_event_source_unref(s);
4478 }
4479
4480 return 1;
4481}
b778cba4
LP
4482
4483_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4484 assert_return(s, -EINVAL);
4485 assert_return(s->type != SOURCE_EXIT, -EDOM);
4486
4487 return s->exit_on_failure;
4488}
4489
4490_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4491 assert_return(s, -EINVAL);
4492 assert_return(s->type != SOURCE_EXIT, -EDOM);
4493
4494 if (s->exit_on_failure == !!b)
4495 return 0;
4496
4497 s->exit_on_failure = b;
4498 return 1;
4499}
b6d5481b
LP
4500
4501_public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) {
4502 int r;
4503
4504 assert_return(s, -EINVAL);
4505
4506 /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
4507 * so is a programming error. */
4508 assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM);
4509
4510 /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
4511 * non-ratelimited. */
fd69f224 4512 r = event_source_leave_ratelimit(s, /* run_callback */ false);
b6d5481b
LP
4513 if (r < 0)
4514 return r;
4515
4516 s->rate_limit = (RateLimit) { interval, burst };
4517 return 0;
fd69f224
MS
4518}
4519
4520_public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) {
4521 assert_return(s, -EINVAL);
4522
4523 s->ratelimit_expire_callback = callback;
4524 return 0;
b6d5481b
LP
4525}
4526
4527_public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) {
4528 assert_return(s, -EINVAL);
4529
4530 /* Querying whether an event source has ratelimiting configured is not a loggable offsense, hence
4531 * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error */
4532 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4533 return -EDOM;
4534
4535 if (!ratelimit_configured(&s->rate_limit))
4536 return -ENOEXEC;
4537
4538 if (ret_interval)
4539 *ret_interval = s->rate_limit.interval;
4540 if (ret_burst)
4541 *ret_burst = s->rate_limit.burst;
4542
4543 return 0;
4544}
4545
4546_public_ int sd_event_source_is_ratelimited(sd_event_source *s) {
4547 assert_return(s, -EINVAL);
4548
4549 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4550 return false;
4551
4552 if (!ratelimit_configured(&s->rate_limit))
4553 return false;
4554
4555 return s->ratelimited;
4556}