]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
update TODO
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
b6d5481b
LP
40static bool event_source_is_online(sd_event_source *s) {
41 assert(s);
42 return s->enabled != SD_EVENT_OFF && !s->ratelimited;
43}
44
45static bool event_source_is_offline(sd_event_source *s) {
46 assert(s);
47 return s->enabled == SD_EVENT_OFF || s->ratelimited;
48}
49
55cbfaa5
DM
50static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
51 [SOURCE_IO] = "io",
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 63 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
64};
65
66DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67
b6d5481b
LP
68#define EVENT_SOURCE_IS_TIME(t) \
69 IN_SET((t), \
70 SOURCE_TIME_REALTIME, \
71 SOURCE_TIME_BOOTTIME, \
72 SOURCE_TIME_MONOTONIC, \
73 SOURCE_TIME_REALTIME_ALARM, \
74 SOURCE_TIME_BOOTTIME_ALARM)
75
76#define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
77 IN_SET((t), \
78 SOURCE_IO, \
79 SOURCE_TIME_REALTIME, \
80 SOURCE_TIME_BOOTTIME, \
81 SOURCE_TIME_MONOTONIC, \
82 SOURCE_TIME_REALTIME_ALARM, \
83 SOURCE_TIME_BOOTTIME_ALARM, \
84 SOURCE_SIGNAL, \
85 SOURCE_DEFER, \
86 SOURCE_INOTIFY)
6a0f1f6d 87
19947509
ZJS
88/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
89 * Time sources and ratelimited sources can be passed, so effectively this is the same as the
90 * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */
91#define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t)
92
fd38203a 93struct sd_event {
da7e457c 94 unsigned n_ref;
fd38203a
LP
95
96 int epoll_fd;
cde93897 97 int watchdog_fd;
fd38203a
LP
98
99 Prioq *pending;
100 Prioq *prepare;
c2ba3ad6 101
a8548816 102 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
103 * can add support for more clocks when the kernel learns to
104 * deal with them, too. */
105 struct clock_data realtime;
a8548816 106 struct clock_data boottime;
6a0f1f6d
LP
107 struct clock_data monotonic;
108 struct clock_data realtime_alarm;
109 struct clock_data boottime_alarm;
fd38203a 110
da7e457c
LP
111 usec_t perturb;
112
9da4cb2b
LP
113 sd_event_source **signal_sources; /* indexed by signal number */
114 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
115
116 Hashmap *child_sources;
b6d5481b 117 unsigned n_online_child_sources;
fd38203a 118
6e9feda3
LP
119 Set *post_sources;
120
6203e07a 121 Prioq *exit;
fd38203a 122
97ef5391
LP
123 Hashmap *inotify_data; /* indexed by priority */
124
125 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
126 LIST_HEAD(struct inode_data, inode_data_to_close);
127
128 /* A list of inotify objects that already have events buffered which aren't processed yet */
129 LIST_HEAD(struct inotify_data, inotify_data_buffered);
130
da7e457c 131 pid_t original_pid;
c2ba3ad6 132
60a3b1e1 133 uint64_t iteration;
e475d10c 134 triple_timestamp timestamp;
da7e457c 135 int state;
eaa3cbef 136
6203e07a 137 bool exit_requested:1;
da7e457c 138 bool need_process_child:1;
cde93897 139 bool watchdog:1;
34b87517 140 bool profile_delays:1;
afc6adb5 141
6203e07a
LP
142 int exit_code;
143
afc6adb5
LP
144 pid_t tid;
145 sd_event **default_event_ptr;
cde93897
LP
146
147 usec_t watchdog_last, watchdog_period;
15b38f93
LP
148
149 unsigned n_sources;
a71fe8b8 150
5cddd924 151 struct epoll_event *event_queue;
5cddd924 152
a71fe8b8 153 LIST_HEAD(sd_event_source, sources);
34b87517 154
e6a7bee5 155 usec_t last_run_usec, last_log_usec;
34b87517 156 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
157};
158
b937d761
NM
159static thread_local sd_event *default_event = NULL;
160
a71fe8b8 161static void source_disconnect(sd_event_source *s);
97ef5391 162static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 163
b937d761
NM
164static sd_event *event_resolve(sd_event *e) {
165 return e == SD_EVENT_DEFAULT ? default_event : e;
166}
167
fd38203a
LP
168static int pending_prioq_compare(const void *a, const void *b) {
169 const sd_event_source *x = a, *y = b;
9c57a73b 170 int r;
fd38203a
LP
171
172 assert(x->pending);
173 assert(y->pending);
174
baf76283 175 /* Enabled ones first */
06e13147
YW
176 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
177 if (r != 0)
178 return r;
fd38203a 179
b6d5481b
LP
180 /* Non rate-limited ones first. */
181 r = CMP(!!x->ratelimited, !!y->ratelimited);
182 if (r != 0)
183 return r;
184
fd38203a 185 /* Lower priority values first */
9c57a73b
YW
186 r = CMP(x->priority, y->priority);
187 if (r != 0)
188 return r;
fd38203a
LP
189
190 /* Older entries first */
9c57a73b 191 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
192}
193
194static int prepare_prioq_compare(const void *a, const void *b) {
195 const sd_event_source *x = a, *y = b;
9c57a73b 196 int r;
fd38203a
LP
197
198 assert(x->prepare);
199 assert(y->prepare);
200
8046c457 201 /* Enabled ones first */
06e13147
YW
202 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
203 if (r != 0)
204 return r;
8046c457 205
b6d5481b
LP
206 /* Non rate-limited ones first. */
207 r = CMP(!!x->ratelimited, !!y->ratelimited);
208 if (r != 0)
209 return r;
210
fd38203a
LP
211 /* Move most recently prepared ones last, so that we can stop
212 * preparing as soon as we hit one that has already been
213 * prepared in the current iteration */
9c57a73b
YW
214 r = CMP(x->prepare_iteration, y->prepare_iteration);
215 if (r != 0)
216 return r;
fd38203a 217
fd38203a 218 /* Lower priority values first */
9c57a73b 219 return CMP(x->priority, y->priority);
fd38203a
LP
220}
221
b6d5481b
LP
222static usec_t time_event_source_next(const sd_event_source *s) {
223 assert(s);
224
225 /* We have two kinds of event sources that have elapsation times associated with them: the actual
226 * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
227 * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
228 * looking at here. */
229
230 if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
231 assert(s->rate_limit.begin != 0);
232 assert(s->rate_limit.interval != 0);
233 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
234 }
235
236 /* Otherwise this must be a time event source, if not ratelimited */
237 if (EVENT_SOURCE_IS_TIME(s->type))
238 return s->time.next;
239
240 return USEC_INFINITY;
241}
242
1bce0ffa 243static usec_t time_event_source_latest(const sd_event_source *s) {
b6d5481b
LP
244 assert(s);
245
246 if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the
247 * same, as we should avoid adding additional inaccuracy on an inaccuracy time
248 * window */
249 assert(s->rate_limit.begin != 0);
250 assert(s->rate_limit.interval != 0);
251 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
252 }
253
254 /* Must be a time event source, if not ratelimited */
255 if (EVENT_SOURCE_IS_TIME(s->type))
256 return usec_add(s->time.next, s->time.accuracy);
257
258 return USEC_INFINITY;
1bce0ffa
LP
259}
260
81107b84
LP
261static bool event_source_timer_candidate(const sd_event_source *s) {
262 assert(s);
263
264 /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
265 * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
266 return !s->pending || s->ratelimited;
267}
268
269static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) {
c2ba3ad6 270 const sd_event_source *x = a, *y = b;
06e13147 271 int r;
c2ba3ad6 272
baf76283 273 /* Enabled ones first */
06e13147
YW
274 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
275 if (r != 0)
276 return r;
c2ba3ad6 277
81107b84 278 /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
06e13147
YW
279 r = CMP(!event_source_timer_candidate(x), !event_source_timer_candidate(y));
280 if (r != 0)
281 return r;
c2ba3ad6
LP
282
283 /* Order by time */
81107b84
LP
284 return CMP(time_func(x), time_func(y));
285}
286
287static int earliest_time_prioq_compare(const void *a, const void *b) {
288 return time_prioq_compare(a, b, time_event_source_next);
289}
290
291static int latest_time_prioq_compare(const void *a, const void *b) {
292 return time_prioq_compare(a, b, time_event_source_latest);
c2ba3ad6
LP
293}
294
6203e07a 295static int exit_prioq_compare(const void *a, const void *b) {
da7e457c 296 const sd_event_source *x = a, *y = b;
06e13147 297 int r;
da7e457c 298
6203e07a
LP
299 assert(x->type == SOURCE_EXIT);
300 assert(y->type == SOURCE_EXIT);
da7e457c 301
baf76283 302 /* Enabled ones first */
06e13147
YW
303 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
304 if (r != 0)
305 return r;
da7e457c
LP
306
307 /* Lower priority values first */
6dd91b36 308 return CMP(x->priority, y->priority);
da7e457c
LP
309}
310
6a0f1f6d
LP
311static void free_clock_data(struct clock_data *d) {
312 assert(d);
9da4cb2b 313 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
314
315 safe_close(d->fd);
316 prioq_free(d->earliest);
317 prioq_free(d->latest);
318}
319
8301aa0b 320static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
321 sd_event_source *s;
322
fd38203a 323 assert(e);
a71fe8b8
LP
324
325 while ((s = e->sources)) {
326 assert(s->floating);
327 source_disconnect(s);
328 sd_event_source_unref(s);
329 }
330
15b38f93 331 assert(e->n_sources == 0);
fd38203a 332
afc6adb5
LP
333 if (e->default_event_ptr)
334 *(e->default_event_ptr) = NULL;
335
03e334a1 336 safe_close(e->epoll_fd);
03e334a1 337 safe_close(e->watchdog_fd);
cde93897 338
6a0f1f6d 339 free_clock_data(&e->realtime);
a8548816 340 free_clock_data(&e->boottime);
6a0f1f6d
LP
341 free_clock_data(&e->monotonic);
342 free_clock_data(&e->realtime_alarm);
343 free_clock_data(&e->boottime_alarm);
344
fd38203a
LP
345 prioq_free(e->pending);
346 prioq_free(e->prepare);
6203e07a 347 prioq_free(e->exit);
fd38203a
LP
348
349 free(e->signal_sources);
9da4cb2b 350 hashmap_free(e->signal_data);
fd38203a 351
97ef5391
LP
352 hashmap_free(e->inotify_data);
353
fd38203a 354 hashmap_free(e->child_sources);
6e9feda3 355 set_free(e->post_sources);
8301aa0b 356
5cddd924
LP
357 free(e->event_queue);
358
8301aa0b 359 return mfree(e);
fd38203a
LP
360}
361
f7262a9f 362_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
363 sd_event *e;
364 int r;
365
305f78bf 366 assert_return(ret, -EINVAL);
fd38203a 367
d08eb1fa 368 e = new(sd_event, 1);
fd38203a
LP
369 if (!e)
370 return -ENOMEM;
371
d08eb1fa
LP
372 *e = (sd_event) {
373 .n_ref = 1,
374 .epoll_fd = -1,
375 .watchdog_fd = -1,
376 .realtime.wakeup = WAKEUP_CLOCK_DATA,
377 .realtime.fd = -1,
378 .realtime.next = USEC_INFINITY,
379 .boottime.wakeup = WAKEUP_CLOCK_DATA,
380 .boottime.fd = -1,
381 .boottime.next = USEC_INFINITY,
382 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
383 .monotonic.fd = -1,
384 .monotonic.next = USEC_INFINITY,
385 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
386 .realtime_alarm.fd = -1,
387 .realtime_alarm.next = USEC_INFINITY,
388 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
389 .boottime_alarm.fd = -1,
390 .boottime_alarm.next = USEC_INFINITY,
391 .perturb = USEC_INFINITY,
392 .original_pid = getpid_cached(),
393 };
fd38203a 394
c983e776
EV
395 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
396 if (r < 0)
fd38203a 397 goto fail;
fd38203a
LP
398
399 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
400 if (e->epoll_fd < 0) {
401 r = -errno;
402 goto fail;
403 }
404
7fe2903c
LP
405 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
406
34b87517 407 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
1d3a473b 408 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 … 2^63 us will be logged every 5s.");
34b87517
VC
409 e->profile_delays = true;
410 }
411
fd38203a
LP
412 *ret = e;
413 return 0;
414
415fail:
416 event_free(e);
417 return r;
418}
419
8301aa0b 420DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 421
afd15bbb
ZJS
422_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
423 if (s)
424 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
425 return sd_event_source_unref(s);
426}
427
eaa3cbef
LP
428static bool event_pid_changed(sd_event *e) {
429 assert(e);
430
a2360a46 431 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
432 * it around over a fork(). Let's complain. */
433
df0ff127 434 return e->original_pid != getpid_cached();
eaa3cbef
LP
435}
436
366e6411 437static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
438 assert(s);
439 assert(s->type == SOURCE_IO);
440
f6806734 441 if (event_pid_changed(s->event))
366e6411 442 return;
f6806734 443
fd38203a 444 if (!s->io.registered)
366e6411 445 return;
fd38203a 446
d1cf2023 447 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
f80a5d6a 448 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
55cbfaa5 449 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
450
451 s->io.registered = false;
fd38203a
LP
452}
453
305f78bf
LP
454static int source_io_register(
455 sd_event_source *s,
456 int enabled,
457 uint32_t events) {
458
fd38203a
LP
459 assert(s);
460 assert(s->type == SOURCE_IO);
baf76283 461 assert(enabled != SD_EVENT_OFF);
fd38203a 462
1eac7948 463 struct epoll_event ev = {
a82f89aa
LP
464 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
465 .data.ptr = s,
466 };
fd38203a 467
15c689d7 468 if (epoll_ctl(s->event->epoll_fd,
1eac7948 469 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
55c540d3 470 s->io.fd, &ev) < 0)
fd38203a
LP
471 return -errno;
472
473 s->io.registered = true;
474
475 return 0;
476}
477
f8f3f926
LP
478static void source_child_pidfd_unregister(sd_event_source *s) {
479 assert(s);
480 assert(s->type == SOURCE_CHILD);
481
482 if (event_pid_changed(s->event))
483 return;
484
485 if (!s->child.registered)
486 return;
487
488 if (EVENT_SOURCE_WATCH_PIDFD(s))
489 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
f80a5d6a 490 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
f8f3f926
LP
491 strna(s->description), event_source_type_to_string(s->type));
492
493 s->child.registered = false;
494}
495
496static int source_child_pidfd_register(sd_event_source *s, int enabled) {
f8f3f926
LP
497 assert(s);
498 assert(s->type == SOURCE_CHILD);
499 assert(enabled != SD_EVENT_OFF);
500
501 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 502 struct epoll_event ev = {
f8f3f926
LP
503 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
504 .data.ptr = s,
505 };
506
55c540d3
ZJS
507 if (epoll_ctl(s->event->epoll_fd,
508 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
509 s->child.pidfd, &ev) < 0)
f8f3f926
LP
510 return -errno;
511 }
512
513 s->child.registered = true;
514 return 0;
515}
516
6a0f1f6d
LP
517static clockid_t event_source_type_to_clock(EventSourceType t) {
518
519 switch (t) {
520
521 case SOURCE_TIME_REALTIME:
522 return CLOCK_REALTIME;
523
a8548816
TG
524 case SOURCE_TIME_BOOTTIME:
525 return CLOCK_BOOTTIME;
526
6a0f1f6d
LP
527 case SOURCE_TIME_MONOTONIC:
528 return CLOCK_MONOTONIC;
529
530 case SOURCE_TIME_REALTIME_ALARM:
531 return CLOCK_REALTIME_ALARM;
532
533 case SOURCE_TIME_BOOTTIME_ALARM:
534 return CLOCK_BOOTTIME_ALARM;
535
536 default:
537 return (clockid_t) -1;
538 }
539}
540
541static EventSourceType clock_to_event_source_type(clockid_t clock) {
542
543 switch (clock) {
544
545 case CLOCK_REALTIME:
546 return SOURCE_TIME_REALTIME;
547
a8548816
TG
548 case CLOCK_BOOTTIME:
549 return SOURCE_TIME_BOOTTIME;
550
6a0f1f6d
LP
551 case CLOCK_MONOTONIC:
552 return SOURCE_TIME_MONOTONIC;
553
554 case CLOCK_REALTIME_ALARM:
555 return SOURCE_TIME_REALTIME_ALARM;
556
557 case CLOCK_BOOTTIME_ALARM:
558 return SOURCE_TIME_BOOTTIME_ALARM;
559
560 default:
561 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
562 }
563}
564
565static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
566 assert(e);
567
568 switch (t) {
569
570 case SOURCE_TIME_REALTIME:
571 return &e->realtime;
572
a8548816
TG
573 case SOURCE_TIME_BOOTTIME:
574 return &e->boottime;
575
6a0f1f6d
LP
576 case SOURCE_TIME_MONOTONIC:
577 return &e->monotonic;
578
579 case SOURCE_TIME_REALTIME_ALARM:
580 return &e->realtime_alarm;
581
582 case SOURCE_TIME_BOOTTIME_ALARM:
583 return &e->boottime_alarm;
584
585 default:
586 return NULL;
587 }
588}
589
3e4eb8e7
YW
590static void event_free_signal_data(sd_event *e, struct signal_data *d) {
591 assert(e);
592
593 if (!d)
594 return;
595
596 hashmap_remove(e->signal_data, &d->priority);
597 safe_close(d->fd);
598 free(d);
599}
600
9da4cb2b
LP
601static int event_make_signal_data(
602 sd_event *e,
603 int sig,
604 struct signal_data **ret) {
4807d2d0 605
9da4cb2b
LP
606 struct signal_data *d;
607 bool added = false;
608 sigset_t ss_copy;
609 int64_t priority;
f95387cd
ZJS
610 int r;
611
612 assert(e);
613
f6806734 614 if (event_pid_changed(e))
9da4cb2b 615 return -ECHILD;
f6806734 616
9da4cb2b
LP
617 if (e->signal_sources && e->signal_sources[sig])
618 priority = e->signal_sources[sig]->priority;
619 else
de05913d 620 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 621
9da4cb2b
LP
622 d = hashmap_get(e->signal_data, &priority);
623 if (d) {
624 if (sigismember(&d->sigset, sig) > 0) {
625 if (ret)
626 *ret = d;
627 return 0;
628 }
629 } else {
d08eb1fa 630 d = new(struct signal_data, 1);
9da4cb2b
LP
631 if (!d)
632 return -ENOMEM;
633
d08eb1fa
LP
634 *d = (struct signal_data) {
635 .wakeup = WAKEUP_SIGNAL_DATA,
636 .fd = -1,
637 .priority = priority,
638 };
9da4cb2b 639
f656fdb6 640 r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d);
90f604d1
ZJS
641 if (r < 0) {
642 free(d);
9da4cb2b 643 return r;
90f604d1 644 }
f95387cd 645
9da4cb2b
LP
646 added = true;
647 }
648
649 ss_copy = d->sigset;
650 assert_se(sigaddset(&ss_copy, sig) >= 0);
651
652 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
653 if (r < 0) {
654 r = -errno;
655 goto fail;
656 }
657
658 d->sigset = ss_copy;
f95387cd 659
9da4cb2b
LP
660 if (d->fd >= 0) {
661 if (ret)
662 *ret = d;
f95387cd 663 return 0;
9da4cb2b
LP
664 }
665
7fe2903c 666 d->fd = fd_move_above_stdio(r);
f95387cd 667
1eac7948 668 struct epoll_event ev = {
a82f89aa
LP
669 .events = EPOLLIN,
670 .data.ptr = d,
671 };
f95387cd 672
15c689d7 673 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
674 r = -errno;
675 goto fail;
f95387cd
ZJS
676 }
677
9da4cb2b
LP
678 if (ret)
679 *ret = d;
680
f95387cd 681 return 0;
9da4cb2b
LP
682
683fail:
3e4eb8e7
YW
684 if (added)
685 event_free_signal_data(e, d);
9da4cb2b
LP
686
687 return r;
688}
689
690static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
691 assert(e);
692 assert(d);
693
694 /* Turns off the specified signal in the signal data
695 * object. If the signal mask of the object becomes empty that
696 * way removes it. */
697
698 if (sigismember(&d->sigset, sig) == 0)
699 return;
700
701 assert_se(sigdelset(&d->sigset, sig) >= 0);
702
703 if (sigisemptyset(&d->sigset)) {
9da4cb2b 704 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 705 event_free_signal_data(e, d);
9da4cb2b
LP
706 return;
707 }
708
709 assert(d->fd >= 0);
710
711 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
712 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
713}
714
715static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
716 struct signal_data *d;
717 static const int64_t zero_priority = 0;
718
719 assert(e);
720
f8f3f926
LP
721 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
722 * and possibly drop the signalfd for it. */
9da4cb2b
LP
723
724 if (sig == SIGCHLD &&
b6d5481b 725 e->n_online_child_sources > 0)
9da4cb2b
LP
726 return;
727
728 if (e->signal_sources &&
729 e->signal_sources[sig] &&
b6d5481b 730 event_source_is_online(e->signal_sources[sig]))
9da4cb2b
LP
731 return;
732
733 /*
734 * The specified signal might be enabled in three different queues:
735 *
736 * 1) the one that belongs to the priority passed (if it is non-NULL)
737 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
738 * 3) the 0 priority (to cover the SIGCHLD case)
739 *
740 * Hence, let's remove it from all three here.
741 */
742
743 if (priority) {
744 d = hashmap_get(e->signal_data, priority);
745 if (d)
746 event_unmask_signal_data(e, d, sig);
747 }
748
749 if (e->signal_sources && e->signal_sources[sig]) {
750 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
751 if (d)
752 event_unmask_signal_data(e, d, sig);
753 }
754
755 d = hashmap_get(e->signal_data, &zero_priority);
756 if (d)
757 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
758}
759
e1951c16
MS
760static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
761 assert(s);
762
763 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
764 * they are enabled/disabled or marked pending and such. */
765
766 if (s->pending)
767 prioq_reshuffle(s->event->pending, s, &s->pending_index);
768
769 if (s->prepare)
770 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
771}
772
773static void event_source_time_prioq_reshuffle(sd_event_source *s) {
774 struct clock_data *d;
775
776 assert(s);
e1951c16
MS
777
778 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
5c08c7ab
YW
779 * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered
780 * properly again. */
b6d5481b
LP
781
782 if (s->ratelimited)
783 d = &s->event->monotonic;
5c08c7ab 784 else if (EVENT_SOURCE_IS_TIME(s->type))
b6d5481b 785 assert_se(d = event_get_clock_data(s->event, s->type));
5c08c7ab
YW
786 else
787 return; /* no-op for an event source which is neither a timer nor ratelimited. */
b6d5481b 788
f41315fc
LP
789 prioq_reshuffle(d->earliest, s, &s->earliest_index);
790 prioq_reshuffle(d->latest, s, &s->latest_index);
e1951c16
MS
791 d->needs_rearm = true;
792}
793
1e45e3fe
LP
794static void event_source_time_prioq_remove(
795 sd_event_source *s,
796 struct clock_data *d) {
797
798 assert(s);
799 assert(d);
800
f41315fc
LP
801 prioq_remove(d->earliest, s, &s->earliest_index);
802 prioq_remove(d->latest, s, &s->latest_index);
803 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
804 d->needs_rearm = true;
805}
806
a71fe8b8
LP
807static void source_disconnect(sd_event_source *s) {
808 sd_event *event;
809
fd38203a
LP
810 assert(s);
811
a71fe8b8
LP
812 if (!s->event)
813 return;
15b38f93 814
a71fe8b8 815 assert(s->event->n_sources > 0);
fd38203a 816
a71fe8b8 817 switch (s->type) {
fd38203a 818
a71fe8b8
LP
819 case SOURCE_IO:
820 if (s->io.fd >= 0)
821 source_io_unregister(s);
fd38203a 822
a71fe8b8 823 break;
6a0f1f6d 824
a71fe8b8 825 case SOURCE_TIME_REALTIME:
a8548816 826 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
827 case SOURCE_TIME_MONOTONIC:
828 case SOURCE_TIME_REALTIME_ALARM:
b6d5481b
LP
829 case SOURCE_TIME_BOOTTIME_ALARM:
830 /* Only remove this event source from the time event source here if it is not ratelimited. If
831 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
832 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
833
834 if (!s->ratelimited) {
835 struct clock_data *d;
836 assert_se(d = event_get_clock_data(s->event, s->type));
837 event_source_time_prioq_remove(s, d);
838 }
839
a71fe8b8 840 break;
a71fe8b8
LP
841
842 case SOURCE_SIGNAL:
843 if (s->signal.sig > 0) {
9da4cb2b 844
a71fe8b8
LP
845 if (s->event->signal_sources)
846 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 847
9da4cb2b 848 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 849 }
fd38203a 850
a71fe8b8 851 break;
fd38203a 852
a71fe8b8
LP
853 case SOURCE_CHILD:
854 if (s->child.pid > 0) {
b6d5481b
LP
855 if (event_source_is_online(s)) {
856 assert(s->event->n_online_child_sources > 0);
857 s->event->n_online_child_sources--;
4807d2d0 858 }
fd38203a 859
4a0b58c4 860 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 861 }
fd38203a 862
f8f3f926
LP
863 if (EVENT_SOURCE_WATCH_PIDFD(s))
864 source_child_pidfd_unregister(s);
865 else
866 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
867
a71fe8b8 868 break;
fd38203a 869
a71fe8b8
LP
870 case SOURCE_DEFER:
871 /* nothing */
872 break;
fd38203a 873
a71fe8b8
LP
874 case SOURCE_POST:
875 set_remove(s->event->post_sources, s);
876 break;
da7e457c 877
a71fe8b8
LP
878 case SOURCE_EXIT:
879 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
880 break;
0eb2e0e3 881
97ef5391
LP
882 case SOURCE_INOTIFY: {
883 struct inode_data *inode_data;
884
885 inode_data = s->inotify.inode_data;
886 if (inode_data) {
887 struct inotify_data *inotify_data;
888 assert_se(inotify_data = inode_data->inotify_data);
889
890 /* Detach this event source from the inode object */
891 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
892 s->inotify.inode_data = NULL;
893
894 if (s->pending) {
895 assert(inotify_data->n_pending > 0);
896 inotify_data->n_pending--;
897 }
898
899 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
900 * continued to being watched. That's because inotify doesn't really have an API for that: we
901 * can only change watch masks with access to the original inode either by fd or by path. But
902 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 903 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
904 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
905 * there), but given the need for open_by_handle_at() which is privileged and not universally
906 * available this would be quite an incomplete solution. Hence we go the other way, leave the
907 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
908 * anymore after reception. Yes, this sucks, but … Linux … */
909
910 /* Maybe release the inode data (and its inotify) */
911 event_gc_inode_data(s->event, inode_data);
912 }
913
914 break;
915 }
916
a71fe8b8 917 default:
04499a70 918 assert_not_reached();
a71fe8b8 919 }
6e9feda3 920
a71fe8b8
LP
921 if (s->pending)
922 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 923
a71fe8b8
LP
924 if (s->prepare)
925 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 926
b6d5481b
LP
927 if (s->ratelimited)
928 event_source_time_prioq_remove(s, &s->event->monotonic);
929
e514aa1e 930 event = TAKE_PTR(s->event);
a71fe8b8
LP
931 LIST_REMOVE(sources, event->sources, s);
932 event->n_sources--;
fd38203a 933
f5982559
LP
934 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
935 * pidfd associated with this event source, which we'll do only on source_free(). */
936
a71fe8b8
LP
937 if (!s->floating)
938 sd_event_unref(event);
939}
940
75db809a 941static sd_event_source* source_free(sd_event_source *s) {
a71fe8b8 942 assert(s);
fd38203a 943
a71fe8b8 944 source_disconnect(s);
ab93297c
NM
945
946 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
947 s->io.fd = safe_close(s->io.fd);
948
f8f3f926
LP
949 if (s->type == SOURCE_CHILD) {
950 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
951
952 if (s->child.process_owned) {
953
954 if (!s->child.exited) {
955 bool sent = false;
956
957 if (s->child.pidfd >= 0) {
958 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
959 if (errno == ESRCH) /* Already dead */
960 sent = true;
961 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
962 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
963 s->child.pid);
964 } else
965 sent = true;
966 }
967
968 if (!sent)
969 if (kill(s->child.pid, SIGKILL) < 0)
970 if (errno != ESRCH) /* Already dead */
971 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
972 s->child.pid);
973 }
974
975 if (!s->child.waited) {
976 siginfo_t si = {};
977
978 /* Reap the child if we can */
979 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
980 }
981 }
982
983 if (s->child.pidfd_owned)
984 s->child.pidfd = safe_close(s->child.pidfd);
985 }
986
15723a1d
LP
987 if (s->destroy_callback)
988 s->destroy_callback(s->userdata);
ab93297c 989
356779df 990 free(s->description);
75db809a 991 return mfree(s);
fd38203a 992}
8c75fe17 993DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
994
995static int source_set_pending(sd_event_source *s, bool b) {
996 int r;
997
998 assert(s);
6203e07a 999 assert(s->type != SOURCE_EXIT);
fd38203a
LP
1000
1001 if (s->pending == b)
1002 return 0;
1003
1004 s->pending = b;
1005
1006 if (b) {
1007 s->pending_iteration = s->event->iteration;
1008
1009 r = prioq_put(s->event->pending, s, &s->pending_index);
1010 if (r < 0) {
1011 s->pending = false;
1012 return r;
1013 }
1014 } else
1015 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1016
e1951c16
MS
1017 if (EVENT_SOURCE_IS_TIME(s->type))
1018 event_source_time_prioq_reshuffle(s);
2576a19e 1019
9da4cb2b
LP
1020 if (s->type == SOURCE_SIGNAL && !b) {
1021 struct signal_data *d;
1022
1023 d = hashmap_get(s->event->signal_data, &s->priority);
1024 if (d && d->current == s)
1025 d->current = NULL;
1026 }
1027
97ef5391
LP
1028 if (s->type == SOURCE_INOTIFY) {
1029
1030 assert(s->inotify.inode_data);
1031 assert(s->inotify.inode_data->inotify_data);
1032
1033 if (b)
1034 s->inotify.inode_data->inotify_data->n_pending ++;
1035 else {
1036 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1037 s->inotify.inode_data->inotify_data->n_pending --;
1038 }
1039 }
1040
efd3be9d 1041 return 1;
fd38203a
LP
1042}
1043
a71fe8b8 1044static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
1045 sd_event_source *s;
1046
1047 assert(e);
1048
d08eb1fa 1049 s = new(sd_event_source, 1);
fd38203a
LP
1050 if (!s)
1051 return NULL;
1052
d08eb1fa
LP
1053 *s = (struct sd_event_source) {
1054 .n_ref = 1,
1055 .event = e,
1056 .floating = floating,
1057 .type = type,
1058 .pending_index = PRIOQ_IDX_NULL,
1059 .prepare_index = PRIOQ_IDX_NULL,
1060 };
a71fe8b8
LP
1061
1062 if (!floating)
1063 sd_event_ref(e);
fd38203a 1064
a71fe8b8 1065 LIST_PREPEND(sources, e->sources, s);
313cefa1 1066 e->n_sources++;
15b38f93 1067
fd38203a
LP
1068 return s;
1069}
1070
b9350e70
LP
1071static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1072 assert(s);
1073
1074 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1075}
1076
f7262a9f 1077_public_ int sd_event_add_io(
fd38203a 1078 sd_event *e,
151b9b96 1079 sd_event_source **ret,
fd38203a
LP
1080 int fd,
1081 uint32_t events,
718db961 1082 sd_event_io_handler_t callback,
151b9b96 1083 void *userdata) {
fd38203a 1084
ec766a51 1085 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1086 int r;
1087
305f78bf 1088 assert_return(e, -EINVAL);
b937d761 1089 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 1090 assert_return(fd >= 0, -EBADF);
2a16a986 1091 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1092 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1093 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1094
b9350e70
LP
1095 if (!callback)
1096 callback = io_exit_callback;
1097
a71fe8b8 1098 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1099 if (!s)
1100 return -ENOMEM;
1101
9da4cb2b 1102 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1103 s->io.fd = fd;
1104 s->io.events = events;
1105 s->io.callback = callback;
1106 s->userdata = userdata;
baf76283 1107 s->enabled = SD_EVENT_ON;
fd38203a 1108
baf76283 1109 r = source_io_register(s, s->enabled, events);
ec766a51 1110 if (r < 0)
050f74f2 1111 return r;
fd38203a 1112
a71fe8b8
LP
1113 if (ret)
1114 *ret = s;
ec766a51 1115 TAKE_PTR(s);
a71fe8b8 1116
fd38203a
LP
1117 return 0;
1118}
1119
52444dc4
LP
1120static void initialize_perturb(sd_event *e) {
1121 sd_id128_t bootid = {};
1122
1123 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1124 the same time within each minute/second/250ms, so that
52444dc4
LP
1125 events all across the system can be coalesced into a single
1126 CPU wakeup. However, let's take some system-specific
1127 randomness for this value, so that in a network of systems
1128 with synced clocks timer events are distributed a
1129 bit. Here, we calculate a perturbation usec offset from the
1130 boot ID. */
1131
3a43da28 1132 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1133 return;
1134
1135 if (sd_id128_get_boot(&bootid) >= 0)
1136 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1137}
1138
fd38203a
LP
1139static int event_setup_timer_fd(
1140 sd_event *e,
6a0f1f6d
LP
1141 struct clock_data *d,
1142 clockid_t clock) {
fd38203a 1143
fd38203a 1144 assert(e);
6a0f1f6d 1145 assert(d);
fd38203a 1146
6a0f1f6d 1147 if (_likely_(d->fd >= 0))
fd38203a
LP
1148 return 0;
1149
b44d87e2 1150 _cleanup_close_ int fd = -1;
b44d87e2 1151
6a0f1f6d 1152 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1153 if (fd < 0)
1154 return -errno;
1155
7fe2903c
LP
1156 fd = fd_move_above_stdio(fd);
1157
1eac7948 1158 struct epoll_event ev = {
a82f89aa
LP
1159 .events = EPOLLIN,
1160 .data.ptr = d,
1161 };
fd38203a 1162
15c689d7 1163 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1164 return -errno;
fd38203a 1165
b44d87e2 1166 d->fd = TAKE_FD(fd);
fd38203a
LP
1167 return 0;
1168}
1169
c4f1aff2
TG
1170static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1171 assert(s);
1172
1173 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1174}
1175
41c63f36
LP
1176static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1177 int r;
1178
1179 assert(d);
1180
1181 if (d->fd < 0) {
1182 r = event_setup_timer_fd(e, d, clock);
1183 if (r < 0)
1184 return r;
1185 }
1186
1187 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1188 if (r < 0)
1189 return r;
1190
1191 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1192 if (r < 0)
1193 return r;
1194
1195 return 0;
1196}
1197
1e45e3fe
LP
1198static int event_source_time_prioq_put(
1199 sd_event_source *s,
1200 struct clock_data *d) {
1201
1202 int r;
1203
1204 assert(s);
1205 assert(d);
19947509 1206 assert(EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
1e45e3fe 1207
f41315fc 1208 r = prioq_put(d->earliest, s, &s->earliest_index);
1e45e3fe
LP
1209 if (r < 0)
1210 return r;
1211
f41315fc 1212 r = prioq_put(d->latest, s, &s->latest_index);
1e45e3fe 1213 if (r < 0) {
f41315fc
LP
1214 assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
1215 s->earliest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
1216 return r;
1217 }
1218
1219 d->needs_rearm = true;
1220 return 0;
1221}
1222
6a0f1f6d 1223_public_ int sd_event_add_time(
fd38203a 1224 sd_event *e,
151b9b96 1225 sd_event_source **ret,
6a0f1f6d 1226 clockid_t clock,
fd38203a 1227 uint64_t usec,
c2ba3ad6 1228 uint64_t accuracy,
718db961 1229 sd_event_time_handler_t callback,
151b9b96 1230 void *userdata) {
fd38203a 1231
6a0f1f6d 1232 EventSourceType type;
ec766a51 1233 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1234 struct clock_data *d;
fd38203a
LP
1235 int r;
1236
305f78bf 1237 assert_return(e, -EINVAL);
b937d761 1238 assert_return(e = event_resolve(e), -ENOPKG);
f5fbe71d 1239 assert_return(accuracy != UINT64_MAX, -EINVAL);
da7e457c 1240 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1241 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1242
e475d10c
LP
1243 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1244 return -EOPNOTSUPP;
1245
1246 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1247 if (type < 0)
3411372e
LP
1248 return -EOPNOTSUPP;
1249
c4f1aff2
TG
1250 if (!callback)
1251 callback = time_exit_callback;
1252
1e45e3fe 1253 assert_se(d = event_get_clock_data(e, type));
c2ba3ad6 1254
41c63f36 1255 r = setup_clock_data(e, d, clock);
c983e776
EV
1256 if (r < 0)
1257 return r;
fd38203a 1258
a71fe8b8 1259 s = source_new(e, !ret, type);
fd38203a
LP
1260 if (!s)
1261 return -ENOMEM;
1262
1263 s->time.next = usec;
c2ba3ad6 1264 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1265 s->time.callback = callback;
f41315fc 1266 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
fd38203a 1267 s->userdata = userdata;
baf76283 1268 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1269
1e45e3fe 1270 r = event_source_time_prioq_put(s, d);
c2ba3ad6 1271 if (r < 0)
ec766a51 1272 return r;
fd38203a 1273
a71fe8b8
LP
1274 if (ret)
1275 *ret = s;
ec766a51 1276 TAKE_PTR(s);
a71fe8b8 1277
fd38203a
LP
1278 return 0;
1279}
1280
d6a83dc4
LP
1281_public_ int sd_event_add_time_relative(
1282 sd_event *e,
1283 sd_event_source **ret,
1284 clockid_t clock,
1285 uint64_t usec,
1286 uint64_t accuracy,
1287 sd_event_time_handler_t callback,
1288 void *userdata) {
1289
1290 usec_t t;
1291 int r;
1292
1293 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1294 * checks for overflow. */
1295
1296 r = sd_event_now(e, clock, &t);
1297 if (r < 0)
1298 return r;
1299
1300 if (usec >= USEC_INFINITY - t)
1301 return -EOVERFLOW;
1302
1303 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1304}
1305
59bc1fd7
LP
1306static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1307 assert(s);
1308
1309 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1310}
1311
f7262a9f 1312_public_ int sd_event_add_signal(
305f78bf 1313 sd_event *e,
151b9b96 1314 sd_event_source **ret,
305f78bf 1315 int sig,
718db961 1316 sd_event_signal_handler_t callback,
151b9b96 1317 void *userdata) {
305f78bf 1318
ec766a51 1319 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1320 struct signal_data *d;
fd38203a
LP
1321 int r;
1322
305f78bf 1323 assert_return(e, -EINVAL);
b937d761 1324 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1325 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1326 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1327 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1328
59bc1fd7
LP
1329 if (!callback)
1330 callback = signal_exit_callback;
1331
d1b75241
LP
1332 r = signal_is_blocked(sig);
1333 if (r < 0)
1334 return r;
1335 if (r == 0)
3022d74b
LP
1336 return -EBUSY;
1337
fd38203a
LP
1338 if (!e->signal_sources) {
1339 e->signal_sources = new0(sd_event_source*, _NSIG);
1340 if (!e->signal_sources)
1341 return -ENOMEM;
1342 } else if (e->signal_sources[sig])
1343 return -EBUSY;
1344
a71fe8b8 1345 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1346 if (!s)
1347 return -ENOMEM;
1348
1349 s->signal.sig = sig;
1350 s->signal.callback = callback;
1351 s->userdata = userdata;
baf76283 1352 s->enabled = SD_EVENT_ON;
fd38203a
LP
1353
1354 e->signal_sources[sig] = s;
fd38203a 1355
9da4cb2b 1356 r = event_make_signal_data(e, sig, &d);
ec766a51 1357 if (r < 0)
9da4cb2b 1358 return r;
fd38203a 1359
f1f00dbb
LP
1360 /* Use the signal name as description for the event source by default */
1361 (void) sd_event_source_set_description(s, signal_to_string(sig));
1362
a71fe8b8
LP
1363 if (ret)
1364 *ret = s;
ec766a51 1365 TAKE_PTR(s);
a71fe8b8 1366
fd38203a
LP
1367 return 0;
1368}
1369
b9350e70
LP
1370static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1371 assert(s);
1372
1373 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1374}
1375
f8f3f926
LP
1376static bool shall_use_pidfd(void) {
1377 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1378 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1379}
1380
f7262a9f 1381_public_ int sd_event_add_child(
305f78bf 1382 sd_event *e,
151b9b96 1383 sd_event_source **ret,
305f78bf
LP
1384 pid_t pid,
1385 int options,
718db961 1386 sd_event_child_handler_t callback,
151b9b96 1387 void *userdata) {
305f78bf 1388
ec766a51 1389 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1390 int r;
1391
305f78bf 1392 assert_return(e, -EINVAL);
b937d761 1393 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1394 assert_return(pid > 1, -EINVAL);
1395 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1396 assert_return(options != 0, -EINVAL);
da7e457c 1397 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1398 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1399
b9350e70
LP
1400 if (!callback)
1401 callback = child_exit_callback;
1402
b6d5481b 1403 if (e->n_online_child_sources == 0) {
ee880b37
LP
1404 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1405 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1406 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1407 * take effect.
1408 *
1409 * (As an optimization we only do this check on the first child event source created.) */
1410 r = signal_is_blocked(SIGCHLD);
1411 if (r < 0)
1412 return r;
1413 if (r == 0)
1414 return -EBUSY;
1415 }
1416
d5099efc 1417 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1418 if (r < 0)
1419 return r;
1420
4a0b58c4 1421 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1422 return -EBUSY;
1423
a71fe8b8 1424 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1425 if (!s)
1426 return -ENOMEM;
1427
f8f3f926 1428 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1429 s->child.pid = pid;
1430 s->child.options = options;
1431 s->child.callback = callback;
1432 s->userdata = userdata;
baf76283 1433 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1434
f8f3f926
LP
1435 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1436 * pin the PID, and make regular waitid() handling race-free. */
1437
1438 if (shall_use_pidfd()) {
1439 s->child.pidfd = pidfd_open(s->child.pid, 0);
1440 if (s->child.pidfd < 0) {
1441 /* Propagate errors unless the syscall is not supported or blocked */
1442 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1443 return -errno;
1444 } else
1445 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1446 } else
1447 s->child.pidfd = -1;
1448
4a0b58c4 1449 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1450 if (r < 0)
fd38203a 1451 return r;
fd38203a 1452
f8f3f926
LP
1453 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1454 /* We have a pidfd and we only want to watch for exit */
f8f3f926 1455 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1456 if (r < 0)
f8f3f926 1457 return r;
ac9f2640 1458
f8f3f926
LP
1459 } else {
1460 /* We have no pidfd or we shall wait for some other event than WEXITED */
f8f3f926 1461 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1462 if (r < 0)
f8f3f926 1463 return r;
f8f3f926
LP
1464
1465 e->need_process_child = true;
1466 }
c2ba3ad6 1467
b6d5481b 1468 e->n_online_child_sources++;
ac9f2640 1469
a71fe8b8
LP
1470 if (ret)
1471 *ret = s;
ec766a51 1472 TAKE_PTR(s);
f8f3f926
LP
1473 return 0;
1474}
1475
1476_public_ int sd_event_add_child_pidfd(
1477 sd_event *e,
1478 sd_event_source **ret,
1479 int pidfd,
1480 int options,
1481 sd_event_child_handler_t callback,
1482 void *userdata) {
1483
1484
1485 _cleanup_(source_freep) sd_event_source *s = NULL;
1486 pid_t pid;
1487 int r;
1488
1489 assert_return(e, -EINVAL);
1490 assert_return(e = event_resolve(e), -ENOPKG);
1491 assert_return(pidfd >= 0, -EBADF);
1492 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1493 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1494 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1495 assert_return(!event_pid_changed(e), -ECHILD);
1496
b9350e70
LP
1497 if (!callback)
1498 callback = child_exit_callback;
1499
b6d5481b 1500 if (e->n_online_child_sources == 0) {
ee880b37
LP
1501 r = signal_is_blocked(SIGCHLD);
1502 if (r < 0)
1503 return r;
1504 if (r == 0)
1505 return -EBUSY;
1506 }
1507
f8f3f926
LP
1508 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1509 if (r < 0)
1510 return r;
1511
1512 r = pidfd_get_pid(pidfd, &pid);
1513 if (r < 0)
1514 return r;
1515
1516 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1517 return -EBUSY;
1518
1519 s = source_new(e, !ret, SOURCE_CHILD);
1520 if (!s)
1521 return -ENOMEM;
1522
1523 s->wakeup = WAKEUP_EVENT_SOURCE;
1524 s->child.pidfd = pidfd;
1525 s->child.pid = pid;
1526 s->child.options = options;
1527 s->child.callback = callback;
1528 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1529 s->userdata = userdata;
1530 s->enabled = SD_EVENT_ONESHOT;
1531
1532 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1533 if (r < 0)
1534 return r;
1535
f8f3f926
LP
1536 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1537 /* We only want to watch for WEXITED */
f8f3f926 1538 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1539 if (r < 0)
f8f3f926 1540 return r;
f8f3f926
LP
1541 } else {
1542 /* We shall wait for some other event than WEXITED */
f8f3f926 1543 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1544 if (r < 0)
f8f3f926 1545 return r;
a71fe8b8 1546
f8f3f926
LP
1547 e->need_process_child = true;
1548 }
1549
b6d5481b 1550 e->n_online_child_sources++;
ac9f2640 1551
f8f3f926
LP
1552 if (ret)
1553 *ret = s;
f8f3f926 1554 TAKE_PTR(s);
fd38203a
LP
1555 return 0;
1556}
1557
b9350e70
LP
1558static int generic_exit_callback(sd_event_source *s, void *userdata) {
1559 assert(s);
1560
1561 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1562}
1563
f7262a9f 1564_public_ int sd_event_add_defer(
305f78bf 1565 sd_event *e,
151b9b96 1566 sd_event_source **ret,
718db961 1567 sd_event_handler_t callback,
151b9b96 1568 void *userdata) {
305f78bf 1569
ec766a51 1570 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1571 int r;
1572
305f78bf 1573 assert_return(e, -EINVAL);
b937d761 1574 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1575 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1576 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1577
b9350e70
LP
1578 if (!callback)
1579 callback = generic_exit_callback;
1580
a71fe8b8 1581 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1582 if (!s)
1583 return -ENOMEM;
1584
1585 s->defer.callback = callback;
1586 s->userdata = userdata;
baf76283 1587 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1588
1589 r = source_set_pending(s, true);
ec766a51 1590 if (r < 0)
fd38203a 1591 return r;
fd38203a 1592
a71fe8b8
LP
1593 if (ret)
1594 *ret = s;
ec766a51 1595 TAKE_PTR(s);
a71fe8b8 1596
fd38203a
LP
1597 return 0;
1598}
1599
6e9feda3
LP
1600_public_ int sd_event_add_post(
1601 sd_event *e,
1602 sd_event_source **ret,
1603 sd_event_handler_t callback,
1604 void *userdata) {
1605
ec766a51 1606 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1607 int r;
1608
1609 assert_return(e, -EINVAL);
b937d761 1610 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1611 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1612 assert_return(!event_pid_changed(e), -ECHILD);
1613
b9350e70
LP
1614 if (!callback)
1615 callback = generic_exit_callback;
1616
a71fe8b8 1617 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1618 if (!s)
1619 return -ENOMEM;
1620
1621 s->post.callback = callback;
1622 s->userdata = userdata;
1623 s->enabled = SD_EVENT_ON;
1624
de7fef4b 1625 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1626 if (r < 0)
6e9feda3 1627 return r;
de7fef4b 1628 assert(r > 0);
6e9feda3 1629
a71fe8b8
LP
1630 if (ret)
1631 *ret = s;
ec766a51 1632 TAKE_PTR(s);
a71fe8b8 1633
6e9feda3
LP
1634 return 0;
1635}
1636
6203e07a 1637_public_ int sd_event_add_exit(
305f78bf 1638 sd_event *e,
151b9b96 1639 sd_event_source **ret,
718db961 1640 sd_event_handler_t callback,
151b9b96 1641 void *userdata) {
305f78bf 1642
ec766a51 1643 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1644 int r;
1645
1646 assert_return(e, -EINVAL);
b937d761 1647 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1648 assert_return(callback, -EINVAL);
1649 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1650 assert_return(!event_pid_changed(e), -ECHILD);
1651
c983e776
EV
1652 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1653 if (r < 0)
1654 return r;
da7e457c 1655
a71fe8b8 1656 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1657 if (!s)
da7e457c 1658 return -ENOMEM;
fd38203a 1659
6203e07a 1660 s->exit.callback = callback;
da7e457c 1661 s->userdata = userdata;
6203e07a 1662 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1663 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1664
6203e07a 1665 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1666 if (r < 0)
da7e457c 1667 return r;
da7e457c 1668
a71fe8b8
LP
1669 if (ret)
1670 *ret = s;
ec766a51 1671 TAKE_PTR(s);
a71fe8b8 1672
da7e457c
LP
1673 return 0;
1674}
1675
97ef5391
LP
1676static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1677 assert(e);
1678
1679 if (!d)
1680 return;
1681
1682 assert(hashmap_isempty(d->inodes));
1683 assert(hashmap_isempty(d->wd));
1684
1685 if (d->buffer_filled > 0)
1686 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1687
1688 hashmap_free(d->inodes);
1689 hashmap_free(d->wd);
1690
1691 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1692
1693 if (d->fd >= 0) {
1694 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1695 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1696
1697 safe_close(d->fd);
1698 }
1699 free(d);
1700}
1701
1702static int event_make_inotify_data(
1703 sd_event *e,
1704 int64_t priority,
1705 struct inotify_data **ret) {
1706
1707 _cleanup_close_ int fd = -1;
1708 struct inotify_data *d;
97ef5391
LP
1709 int r;
1710
1711 assert(e);
1712
1713 d = hashmap_get(e->inotify_data, &priority);
1714 if (d) {
1715 if (ret)
1716 *ret = d;
1717 return 0;
1718 }
1719
1720 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1721 if (fd < 0)
1722 return -errno;
1723
1724 fd = fd_move_above_stdio(fd);
1725
97ef5391
LP
1726 d = new(struct inotify_data, 1);
1727 if (!d)
1728 return -ENOMEM;
1729
1730 *d = (struct inotify_data) {
1731 .wakeup = WAKEUP_INOTIFY_DATA,
1732 .fd = TAKE_FD(fd),
1733 .priority = priority,
1734 };
1735
c2484a75 1736 r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d);
97ef5391
LP
1737 if (r < 0) {
1738 d->fd = safe_close(d->fd);
1739 free(d);
1740 return r;
1741 }
1742
1eac7948 1743 struct epoll_event ev = {
97ef5391
LP
1744 .events = EPOLLIN,
1745 .data.ptr = d,
1746 };
1747
1748 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1749 r = -errno;
1750 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1751 * remove the fd from the epoll first, which we don't want as we couldn't
1752 * add it in the first place. */
1753 event_free_inotify_data(e, d);
1754 return r;
1755 }
1756
1757 if (ret)
1758 *ret = d;
1759
1760 return 1;
1761}
1762
7a08d314 1763static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1764 int r;
97ef5391
LP
1765
1766 assert(x);
1767 assert(y);
1768
90c88092
YW
1769 r = CMP(x->dev, y->dev);
1770 if (r != 0)
1771 return r;
97ef5391 1772
6dd91b36 1773 return CMP(x->ino, y->ino);
97ef5391
LP
1774}
1775
7a08d314
YW
1776static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1777 assert(d);
97ef5391
LP
1778
1779 siphash24_compress(&d->dev, sizeof(d->dev), state);
1780 siphash24_compress(&d->ino, sizeof(d->ino), state);
1781}
1782
7a08d314 1783DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1784
1785static void event_free_inode_data(
1786 sd_event *e,
1787 struct inode_data *d) {
1788
1789 assert(e);
1790
1791 if (!d)
1792 return;
1793
1794 assert(!d->event_sources);
1795
1796 if (d->fd >= 0) {
1797 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1798 safe_close(d->fd);
1799 }
1800
1801 if (d->inotify_data) {
1802
1803 if (d->wd >= 0) {
1804 if (d->inotify_data->fd >= 0) {
1805 /* So here's a problem. At the time this runs the watch descriptor might already be
1806 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1807 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1808 * likely case to happen. */
1809
1810 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1811 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1812 }
1813
1814 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1815 }
1816
1817 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1818 }
1819
1820 free(d);
1821}
1822
53baf2ef
LP
1823static void event_gc_inotify_data(
1824 sd_event *e,
1825 struct inotify_data *d) {
1826
1827 assert(e);
1828
1829 /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch
1830 * any inode with it anymore, which in turn happens if no event source of this priority is interested
1831 * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC
1832 * (under the expectation that the GC is called again once the counter is decremented). */
1833
1834 if (!d)
1835 return;
1836
1837 if (!hashmap_isempty(d->inodes))
1838 return;
1839
1840 if (d->n_busy > 0)
1841 return;
1842
1843 event_free_inotify_data(e, d);
1844}
1845
97ef5391
LP
1846static void event_gc_inode_data(
1847 sd_event *e,
1848 struct inode_data *d) {
1849
1850 struct inotify_data *inotify_data;
1851
1852 assert(e);
1853
1854 if (!d)
1855 return;
1856
1857 if (d->event_sources)
1858 return;
1859
1860 inotify_data = d->inotify_data;
1861 event_free_inode_data(e, d);
1862
53baf2ef 1863 event_gc_inotify_data(e, inotify_data);
97ef5391
LP
1864}
1865
1866static int event_make_inode_data(
1867 sd_event *e,
1868 struct inotify_data *inotify_data,
1869 dev_t dev,
1870 ino_t ino,
1871 struct inode_data **ret) {
1872
1873 struct inode_data *d, key;
1874 int r;
1875
1876 assert(e);
1877 assert(inotify_data);
1878
1879 key = (struct inode_data) {
1880 .ino = ino,
1881 .dev = dev,
1882 };
1883
1884 d = hashmap_get(inotify_data->inodes, &key);
1885 if (d) {
1886 if (ret)
1887 *ret = d;
1888
1889 return 0;
1890 }
1891
1892 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1893 if (r < 0)
1894 return r;
1895
1896 d = new(struct inode_data, 1);
1897 if (!d)
1898 return -ENOMEM;
1899
1900 *d = (struct inode_data) {
1901 .dev = dev,
1902 .ino = ino,
1903 .wd = -1,
1904 .fd = -1,
1905 .inotify_data = inotify_data,
1906 };
1907
1908 r = hashmap_put(inotify_data->inodes, d, d);
1909 if (r < 0) {
1910 free(d);
1911 return r;
1912 }
1913
1914 if (ret)
1915 *ret = d;
1916
1917 return 1;
1918}
1919
1920static uint32_t inode_data_determine_mask(struct inode_data *d) {
1921 bool excl_unlink = true;
1922 uint32_t combined = 0;
1923 sd_event_source *s;
1924
1925 assert(d);
1926
1927 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1928 * the IN_EXCL_UNLINK flag is ANDed instead.
1929 *
1930 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1931 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1932 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1933 * events we don't care for client-side. */
1934
1935 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1936
1937 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1938 excl_unlink = false;
1939
1940 combined |= s->inotify.mask;
1941 }
1942
1943 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1944}
1945
1946static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1947 uint32_t combined_mask;
1948 int wd, r;
1949
1950 assert(d);
1951 assert(d->fd >= 0);
1952
1953 combined_mask = inode_data_determine_mask(d);
1954
1955 if (d->wd >= 0 && combined_mask == d->combined_mask)
1956 return 0;
1957
1958 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1959 if (r < 0)
1960 return r;
1961
1962 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1963 if (wd < 0)
1964 return -errno;
1965
1966 if (d->wd < 0) {
1967 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1968 if (r < 0) {
1969 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1970 return r;
1971 }
1972
1973 d->wd = wd;
1974
1975 } else if (d->wd != wd) {
1976
1977 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1978 (void) inotify_rm_watch(d->fd, wd);
1979 return -EINVAL;
1980 }
1981
1982 d->combined_mask = combined_mask;
1983 return 1;
1984}
1985
b9350e70
LP
1986static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1987 assert(s);
1988
1989 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1990}
1991
e67d738a 1992static int event_add_inotify_fd_internal(
97ef5391
LP
1993 sd_event *e,
1994 sd_event_source **ret,
e67d738a
LP
1995 int fd,
1996 bool donate,
97ef5391
LP
1997 uint32_t mask,
1998 sd_event_inotify_handler_t callback,
1999 void *userdata) {
2000
e67d738a
LP
2001 _cleanup_close_ int donated_fd = donate ? fd : -1;
2002 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
2003 struct inotify_data *inotify_data = NULL;
2004 struct inode_data *inode_data = NULL;
97ef5391
LP
2005 struct stat st;
2006 int r;
2007
2008 assert_return(e, -EINVAL);
2009 assert_return(e = event_resolve(e), -ENOPKG);
e67d738a 2010 assert_return(fd >= 0, -EBADF);
97ef5391
LP
2011 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2012 assert_return(!event_pid_changed(e), -ECHILD);
2013
b9350e70
LP
2014 if (!callback)
2015 callback = inotify_exit_callback;
2016
97ef5391
LP
2017 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
2018 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
2019 * the user can't use them for us. */
2020 if (mask & IN_MASK_ADD)
2021 return -EINVAL;
2022
97ef5391
LP
2023 if (fstat(fd, &st) < 0)
2024 return -errno;
2025
2026 s = source_new(e, !ret, SOURCE_INOTIFY);
2027 if (!s)
2028 return -ENOMEM;
2029
2030 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
2031 s->inotify.mask = mask;
2032 s->inotify.callback = callback;
2033 s->userdata = userdata;
2034
2035 /* Allocate an inotify object for this priority, and an inode object within it */
2036 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
2037 if (r < 0)
8c75fe17 2038 return r;
97ef5391
LP
2039
2040 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17 2041 if (r < 0) {
e67d738a 2042 event_gc_inotify_data(e, inotify_data);
8c75fe17
ZJS
2043 return r;
2044 }
97ef5391
LP
2045
2046 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2047 * the event source, until then, for which we need the original inode. */
2048 if (inode_data->fd < 0) {
e67d738a
LP
2049 if (donated_fd >= 0)
2050 inode_data->fd = TAKE_FD(donated_fd);
2051 else {
2052 inode_data->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2053 if (inode_data->fd < 0) {
2054 r = -errno;
2055 event_gc_inode_data(e, inode_data);
2056 return r;
2057 }
2058 }
2059
97ef5391
LP
2060 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
2061 }
2062
2063 /* Link our event source to the inode data object */
2064 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
2065 s->inotify.inode_data = inode_data;
2066
97ef5391
LP
2067 /* Actually realize the watch now */
2068 r = inode_data_realize_watch(e, inode_data);
2069 if (r < 0)
8c75fe17 2070 return r;
97ef5391 2071
97ef5391
LP
2072 if (ret)
2073 *ret = s;
8c75fe17 2074 TAKE_PTR(s);
97ef5391
LP
2075
2076 return 0;
97ef5391
LP
2077}
2078
e67d738a
LP
2079_public_ int sd_event_add_inotify_fd(
2080 sd_event *e,
2081 sd_event_source **ret,
2082 int fd,
2083 uint32_t mask,
2084 sd_event_inotify_handler_t callback,
2085 void *userdata) {
2086
2087 return event_add_inotify_fd_internal(e, ret, fd, /* donate= */ false, mask, callback, userdata);
2088}
2089
2090_public_ int sd_event_add_inotify(
2091 sd_event *e,
2092 sd_event_source **ret,
2093 const char *path,
2094 uint32_t mask,
2095 sd_event_inotify_handler_t callback,
2096 void *userdata) {
2097
2098 sd_event_source *s;
2099 int fd, r;
2100
2101 assert_return(path, -EINVAL);
2102
2103 fd = open(path, O_PATH|O_CLOEXEC|
2104 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
2105 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
2106 if (fd < 0)
2107 return -errno;
2108
2109 r = event_add_inotify_fd_internal(e, &s, fd, /* donate= */ true, mask, callback, userdata);
2110 if (r < 0)
2111 return r;
2112
2113 (void) sd_event_source_set_description(s, path);
2114
2115 if (ret)
2116 *ret = s;
2117
2118 return r;
2119}
2120
8301aa0b 2121static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
2122 if (!s)
2123 return NULL;
da7e457c 2124
8301aa0b
YW
2125 /* Here's a special hack: when we are called from a
2126 * dispatch handler we won't free the event source
2127 * immediately, but we will detach the fd from the
2128 * epoll. This way it is safe for the caller to unref
2129 * the event source and immediately close the fd, but
2130 * we still retain a valid event source object after
2131 * the callback. */
fd38203a 2132
8301aa0b
YW
2133 if (s->dispatching) {
2134 if (s->type == SOURCE_IO)
2135 source_io_unregister(s);
fd38203a 2136
8301aa0b
YW
2137 source_disconnect(s);
2138 } else
2139 source_free(s);
fd38203a
LP
2140
2141 return NULL;
2142}
2143
8301aa0b
YW
2144DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
2145
356779df 2146_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 2147 assert_return(s, -EINVAL);
f4b2933e 2148 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2149
356779df 2150 return free_and_strdup(&s->description, description);
f7f53e9e
TG
2151}
2152
356779df 2153_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 2154 assert_return(s, -EINVAL);
356779df 2155 assert_return(description, -EINVAL);
f4b2933e 2156 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2157
7d92a1a4
ZJS
2158 if (!s->description)
2159 return -ENXIO;
2160
356779df 2161 *description = s->description;
f7f53e9e
TG
2162 return 0;
2163}
2164
adcc4ca3 2165_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 2166 assert_return(s, NULL);
eaa3cbef
LP
2167
2168 return s->event;
2169}
2170
f7262a9f 2171_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 2172 assert_return(s, -EINVAL);
6203e07a 2173 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 2174 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2175 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2176
2177 return s->pending;
2178}
2179
f7262a9f 2180_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2181 assert_return(s, -EINVAL);
2182 assert_return(s->type == SOURCE_IO, -EDOM);
2183 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2184
2185 return s->io.fd;
2186}
2187
30caf8f3
LP
2188_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2189 int r;
2190
2191 assert_return(s, -EINVAL);
8ac43fee 2192 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2193 assert_return(s->type == SOURCE_IO, -EDOM);
2194 assert_return(!event_pid_changed(s->event), -ECHILD);
2195
2196 if (s->io.fd == fd)
2197 return 0;
2198
b6d5481b 2199 if (event_source_is_offline(s)) {
30caf8f3
LP
2200 s->io.fd = fd;
2201 s->io.registered = false;
2202 } else {
2203 int saved_fd;
2204
2205 saved_fd = s->io.fd;
2206 assert(s->io.registered);
2207
2208 s->io.fd = fd;
2209 s->io.registered = false;
2210
2211 r = source_io_register(s, s->enabled, s->io.events);
2212 if (r < 0) {
2213 s->io.fd = saved_fd;
2214 s->io.registered = true;
2215 return r;
2216 }
2217
5a795bff 2218 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2219 }
2220
2221 return 0;
2222}
2223
ab93297c
NM
2224_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2225 assert_return(s, -EINVAL);
2226 assert_return(s->type == SOURCE_IO, -EDOM);
2227
2228 return s->io.owned;
2229}
2230
2231_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2232 assert_return(s, -EINVAL);
2233 assert_return(s->type == SOURCE_IO, -EDOM);
2234
2235 s->io.owned = own;
2236 return 0;
2237}
2238
f7262a9f 2239_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2240 assert_return(s, -EINVAL);
2241 assert_return(events, -EINVAL);
2242 assert_return(s->type == SOURCE_IO, -EDOM);
2243 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2244
2245 *events = s->io.events;
2246 return 0;
2247}
2248
f7262a9f 2249_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2250 int r;
2251
305f78bf
LP
2252 assert_return(s, -EINVAL);
2253 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2254 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2255 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2256 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2257
b63c8d4f
DH
2258 /* edge-triggered updates are never skipped, so we can reset edges */
2259 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2260 return 0;
2261
2a0dc6cd
LP
2262 r = source_set_pending(s, false);
2263 if (r < 0)
2264 return r;
2265
b6d5481b 2266 if (event_source_is_online(s)) {
e4715127 2267 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2268 if (r < 0)
2269 return r;
2270 }
2271
2272 s->io.events = events;
2273
2274 return 0;
2275}
2276
f7262a9f 2277_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2278 assert_return(s, -EINVAL);
2279 assert_return(revents, -EINVAL);
2280 assert_return(s->type == SOURCE_IO, -EDOM);
2281 assert_return(s->pending, -ENODATA);
2282 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2283
2284 *revents = s->io.revents;
2285 return 0;
2286}
2287
f7262a9f 2288_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2289 assert_return(s, -EINVAL);
2290 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2291 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2292
2293 return s->signal.sig;
2294}
2295
31927c16 2296_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2297 assert_return(s, -EINVAL);
2298 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2299
6680b8d1
ME
2300 *priority = s->priority;
2301 return 0;
fd38203a
LP
2302}
2303
31927c16 2304_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2305 bool rm_inotify = false, rm_inode = false;
2306 struct inotify_data *new_inotify_data = NULL;
2307 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2308 int r;
2309
305f78bf 2310 assert_return(s, -EINVAL);
da7e457c 2311 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2312 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2313
2314 if (s->priority == priority)
2315 return 0;
2316
97ef5391
LP
2317 if (s->type == SOURCE_INOTIFY) {
2318 struct inode_data *old_inode_data;
2319
2320 assert(s->inotify.inode_data);
2321 old_inode_data = s->inotify.inode_data;
2322
2323 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2324 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2325 * events we allow priority changes only until the first following iteration. */
2326 if (old_inode_data->fd < 0)
2327 return -EOPNOTSUPP;
2328
2329 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2330 if (r < 0)
2331 return r;
2332 rm_inotify = r > 0;
2333
2334 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2335 if (r < 0)
2336 goto fail;
2337 rm_inode = r > 0;
2338
2339 if (new_inode_data->fd < 0) {
2340 /* Duplicate the fd for the new inode object if we don't have any yet */
2341 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2342 if (new_inode_data->fd < 0) {
2343 r = -errno;
2344 goto fail;
2345 }
2346
2347 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2348 }
2349
2350 /* Move the event source to the new inode data structure */
2351 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2352 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2353 s->inotify.inode_data = new_inode_data;
2354
2355 /* Now create the new watch */
2356 r = inode_data_realize_watch(s->event, new_inode_data);
2357 if (r < 0) {
2358 /* Move it back */
2359 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2360 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2361 s->inotify.inode_data = old_inode_data;
2362 goto fail;
2363 }
2364
2365 s->priority = priority;
2366
2367 event_gc_inode_data(s->event, old_inode_data);
2368
b6d5481b 2369 } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) {
9da4cb2b
LP
2370 struct signal_data *old, *d;
2371
2372 /* Move us from the signalfd belonging to the old
2373 * priority to the signalfd of the new priority */
2374
2375 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2376
2377 s->priority = priority;
2378
2379 r = event_make_signal_data(s->event, s->signal.sig, &d);
2380 if (r < 0) {
2381 s->priority = old->priority;
2382 return r;
2383 }
2384
2385 event_unmask_signal_data(s->event, old, s->signal.sig);
2386 } else
2387 s->priority = priority;
fd38203a 2388
e1951c16 2389 event_source_pp_prioq_reshuffle(s);
fd38203a 2390
6203e07a
LP
2391 if (s->type == SOURCE_EXIT)
2392 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2393
fd38203a 2394 return 0;
97ef5391
LP
2395
2396fail:
2397 if (rm_inode)
2398 event_free_inode_data(s->event, new_inode_data);
2399
2400 if (rm_inotify)
2401 event_free_inotify_data(s->event, new_inotify_data);
2402
2403 return r;
fd38203a
LP
2404}
2405
cad143a8 2406_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
305f78bf 2407 assert_return(s, -EINVAL);
305f78bf 2408 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2409
cad143a8
LP
2410 if (ret)
2411 *ret = s->enabled;
2412
08c1eb0e 2413 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2414}
2415
b6d5481b
LP
2416static int event_source_offline(
2417 sd_event_source *s,
2418 int enabled,
2419 bool ratelimited) {
2420
2421 bool was_offline;
fd38203a
LP
2422 int r;
2423
ddfde737 2424 assert(s);
b6d5481b 2425 assert(enabled == SD_EVENT_OFF || ratelimited);
fd38203a 2426
ddfde737 2427 /* Unset the pending flag when this event source is disabled */
b6d5481b
LP
2428 if (s->enabled != SD_EVENT_OFF &&
2429 enabled == SD_EVENT_OFF &&
2430 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2431 r = source_set_pending(s, false);
2432 if (r < 0)
2433 return r;
2434 }
cc567911 2435
b6d5481b
LP
2436 was_offline = event_source_is_offline(s);
2437 s->enabled = enabled;
2438 s->ratelimited = ratelimited;
fd38203a 2439
ddfde737 2440 switch (s->type) {
fd38203a 2441
ddfde737
LP
2442 case SOURCE_IO:
2443 source_io_unregister(s);
2444 break;
ac989a78 2445
ddfde737
LP
2446 case SOURCE_SIGNAL:
2447 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2448 break;
fd38203a 2449
ddfde737 2450 case SOURCE_CHILD:
b6d5481b
LP
2451 if (!was_offline) {
2452 assert(s->event->n_online_child_sources > 0);
2453 s->event->n_online_child_sources--;
2454 }
fd38203a 2455
ddfde737
LP
2456 if (EVENT_SOURCE_WATCH_PIDFD(s))
2457 source_child_pidfd_unregister(s);
2458 else
2459 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2460 break;
4807d2d0 2461
ddfde737
LP
2462 case SOURCE_EXIT:
2463 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2464 break;
fd38203a 2465
2115b9b6
YW
2466 case SOURCE_TIME_REALTIME:
2467 case SOURCE_TIME_BOOTTIME:
2468 case SOURCE_TIME_MONOTONIC:
2469 case SOURCE_TIME_REALTIME_ALARM:
2470 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737
LP
2471 case SOURCE_DEFER:
2472 case SOURCE_POST:
2473 case SOURCE_INOTIFY:
2474 break;
fd38203a 2475
ddfde737 2476 default:
04499a70 2477 assert_not_reached();
ddfde737 2478 }
fd38203a 2479
2115b9b6
YW
2480 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2481 event_source_time_prioq_reshuffle(s);
2482
b6d5481b 2483 return 1;
ddfde737 2484}
f8f3f926 2485
b6d5481b
LP
2486static int event_source_online(
2487 sd_event_source *s,
2488 int enabled,
2489 bool ratelimited) {
2490
2491 bool was_online;
ddfde737 2492 int r;
fd38203a 2493
ddfde737 2494 assert(s);
b6d5481b 2495 assert(enabled != SD_EVENT_OFF || !ratelimited);
305f78bf 2496
ddfde737 2497 /* Unset the pending flag when this event source is enabled */
b6d5481b
LP
2498 if (s->enabled == SD_EVENT_OFF &&
2499 enabled != SD_EVENT_OFF &&
2500 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2501 r = source_set_pending(s, false);
2502 if (r < 0)
2503 return r;
2504 }
9d3e3aa5 2505
b6d5481b
LP
2506 /* Are we really ready for onlining? */
2507 if (enabled == SD_EVENT_OFF || ratelimited) {
2508 /* Nope, we are not ready for onlining, then just update the precise state and exit */
2509 s->enabled = enabled;
2510 s->ratelimited = ratelimited;
2511 return 0;
2512 }
2513
2514 was_online = event_source_is_online(s);
2515
ddfde737 2516 switch (s->type) {
ddfde737 2517 case SOURCE_IO:
b6d5481b 2518 r = source_io_register(s, enabled, s->io.events);
d2eafe61 2519 if (r < 0)
ddfde737 2520 return r;
ddfde737 2521 break;
fd38203a 2522
ddfde737
LP
2523 case SOURCE_SIGNAL:
2524 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2525 if (r < 0) {
ddfde737
LP
2526 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2527 return r;
2528 }
fd38203a 2529
ddfde737 2530 break;
fd38203a 2531
ddfde737 2532 case SOURCE_CHILD:
ddfde737
LP
2533 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2534 /* yes, we have pidfd */
9da4cb2b 2535
b6d5481b 2536 r = source_child_pidfd_register(s, enabled);
ac9f2640 2537 if (r < 0)
9da4cb2b 2538 return r;
ddfde737
LP
2539 } else {
2540 /* no pidfd, or something other to watch for than WEXITED */
9da4cb2b 2541
ddfde737
LP
2542 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2543 if (r < 0) {
ddfde737
LP
2544 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2545 return r;
2546 }
2547 }
fd38203a 2548
b6d5481b
LP
2549 if (!was_online)
2550 s->event->n_online_child_sources++;
ddfde737 2551 break;
4807d2d0 2552
d2eafe61
ZJS
2553 case SOURCE_TIME_REALTIME:
2554 case SOURCE_TIME_BOOTTIME:
2555 case SOURCE_TIME_MONOTONIC:
2556 case SOURCE_TIME_REALTIME_ALARM:
2557 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737 2558 case SOURCE_EXIT:
ddfde737
LP
2559 case SOURCE_DEFER:
2560 case SOURCE_POST:
2561 case SOURCE_INOTIFY:
2562 break;
9da4cb2b 2563
ddfde737 2564 default:
04499a70 2565 assert_not_reached();
ddfde737 2566 }
f8f3f926 2567
b6d5481b
LP
2568 s->enabled = enabled;
2569 s->ratelimited = ratelimited;
d2eafe61
ZJS
2570
2571 /* Non-failing operations below */
2115b9b6 2572 if (s->type == SOURCE_EXIT)
d2eafe61 2573 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
d2eafe61 2574
2115b9b6
YW
2575 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2576 event_source_time_prioq_reshuffle(s);
d2eafe61 2577
b6d5481b 2578 return 1;
ddfde737
LP
2579}
2580
2581_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2582 int r;
9da4cb2b 2583
ddfde737
LP
2584 assert_return(s, -EINVAL);
2585 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2586 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2587
ddfde737
LP
2588 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2589 if (s->event->state == SD_EVENT_FINISHED)
2590 return m == SD_EVENT_OFF ? 0 : -ESTALE;
305f78bf 2591
ddfde737
LP
2592 if (s->enabled == m) /* No change? */
2593 return 0;
9d3e3aa5 2594
ddfde737 2595 if (m == SD_EVENT_OFF)
b6d5481b 2596 r = event_source_offline(s, m, s->ratelimited);
ddfde737
LP
2597 else {
2598 if (s->enabled != SD_EVENT_OFF) {
2599 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2600 * event source is already enabled after all. */
2601 s->enabled = m;
2602 return 0;
fd38203a 2603 }
ddfde737 2604
b6d5481b 2605 r = event_source_online(s, m, s->ratelimited);
fd38203a 2606 }
ddfde737
LP
2607 if (r < 0)
2608 return r;
fd38203a 2609
e1951c16 2610 event_source_pp_prioq_reshuffle(s);
fd38203a
LP
2611 return 0;
2612}
2613
f7262a9f 2614_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2615 assert_return(s, -EINVAL);
2616 assert_return(usec, -EINVAL);
6a0f1f6d 2617 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2618 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2619
2620 *usec = s->time.next;
2621 return 0;
2622}
2623
f7262a9f 2624_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2625 int r;
6a0f1f6d 2626
305f78bf 2627 assert_return(s, -EINVAL);
6a0f1f6d 2628 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2629 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2630 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2631
2a0dc6cd
LP
2632 r = source_set_pending(s, false);
2633 if (r < 0)
2634 return r;
2576a19e 2635
2a0dc6cd 2636 s->time.next = usec;
fd38203a 2637
e1951c16 2638 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2639 return 0;
2640}
2641
d6a83dc4
LP
2642_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2643 usec_t t;
2644 int r;
2645
2646 assert_return(s, -EINVAL);
2647 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2648
2649 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2650 if (r < 0)
2651 return r;
2652
496db330
YW
2653 usec = usec_add(t, usec);
2654 if (usec == USEC_INFINITY)
d6a83dc4
LP
2655 return -EOVERFLOW;
2656
496db330 2657 return sd_event_source_set_time(s, usec);
d6a83dc4
LP
2658}
2659
f7262a9f 2660_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2661 assert_return(s, -EINVAL);
2662 assert_return(usec, -EINVAL);
6a0f1f6d 2663 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2664 assert_return(!event_pid_changed(s->event), -ECHILD);
2665
2666 *usec = s->time.accuracy;
2667 return 0;
2668}
2669
f7262a9f 2670_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2671 int r;
6a0f1f6d 2672
305f78bf 2673 assert_return(s, -EINVAL);
f5fbe71d 2674 assert_return(usec != UINT64_MAX, -EINVAL);
6a0f1f6d 2675 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2676 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2677 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2678
2a0dc6cd
LP
2679 r = source_set_pending(s, false);
2680 if (r < 0)
2681 return r;
2682
eaa3cbef
LP
2683 if (usec == 0)
2684 usec = DEFAULT_ACCURACY_USEC;
2685
eaa3cbef
LP
2686 s->time.accuracy = usec;
2687
e1951c16 2688 event_source_time_prioq_reshuffle(s);
6a0f1f6d
LP
2689 return 0;
2690}
2691
2692_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2693 assert_return(s, -EINVAL);
2694 assert_return(clock, -EINVAL);
2695 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2696 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2697
6a0f1f6d 2698 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2699 return 0;
2700}
2701
f7262a9f 2702_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2703 assert_return(s, -EINVAL);
2704 assert_return(pid, -EINVAL);
2705 assert_return(s->type == SOURCE_CHILD, -EDOM);
2706 assert_return(!event_pid_changed(s->event), -ECHILD);
2707
2708 *pid = s->child.pid;
2709 return 0;
2710}
2711
f8f3f926
LP
2712_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2713 assert_return(s, -EINVAL);
2714 assert_return(s->type == SOURCE_CHILD, -EDOM);
2715 assert_return(!event_pid_changed(s->event), -ECHILD);
2716
2717 if (s->child.pidfd < 0)
2718 return -EOPNOTSUPP;
2719
2720 return s->child.pidfd;
2721}
2722
2723_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2724 assert_return(s, -EINVAL);
2725 assert_return(s->type == SOURCE_CHILD, -EDOM);
2726 assert_return(!event_pid_changed(s->event), -ECHILD);
2727 assert_return(SIGNAL_VALID(sig), -EINVAL);
2728
2729 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2730 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2731 * available. */
2732 if (s->child.exited)
2733 return -ESRCH;
2734
2735 if (s->child.pidfd >= 0) {
2736 siginfo_t copy;
2737
2738 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2739 * structure here */
2740 if (si)
2741 copy = *si;
2742
2743 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2744 /* Let's propagate the error only if the system call is not implemented or prohibited */
2745 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2746 return -errno;
2747 } else
2748 return 0;
2749 }
2750
2751 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2752 * this here. */
2753 if (flags != 0)
2754 return -EOPNOTSUPP;
2755
2756 if (si) {
2757 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2758 siginfo_t copy = *si;
2759
2760 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2761 return -errno;
2762 } else if (kill(s->child.pid, sig) < 0)
2763 return -errno;
2764
2765 return 0;
2766}
2767
2768_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2769 assert_return(s, -EINVAL);
2770 assert_return(s->type == SOURCE_CHILD, -EDOM);
2771
2772 if (s->child.pidfd < 0)
2773 return -EOPNOTSUPP;
2774
2775 return s->child.pidfd_owned;
2776}
2777
2778_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2779 assert_return(s, -EINVAL);
2780 assert_return(s->type == SOURCE_CHILD, -EDOM);
2781
2782 if (s->child.pidfd < 0)
2783 return -EOPNOTSUPP;
2784
2785 s->child.pidfd_owned = own;
2786 return 0;
2787}
2788
2789_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2790 assert_return(s, -EINVAL);
2791 assert_return(s->type == SOURCE_CHILD, -EDOM);
2792
2793 return s->child.process_owned;
2794}
2795
2796_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2797 assert_return(s, -EINVAL);
2798 assert_return(s->type == SOURCE_CHILD, -EDOM);
2799
2800 s->child.process_owned = own;
2801 return 0;
2802}
2803
97ef5391
LP
2804_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2805 assert_return(s, -EINVAL);
2806 assert_return(mask, -EINVAL);
2807 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2808 assert_return(!event_pid_changed(s->event), -ECHILD);
2809
2810 *mask = s->inotify.mask;
2811 return 0;
2812}
2813
718db961 2814_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2815 int r;
2816
da7e457c 2817 assert_return(s, -EINVAL);
6203e07a 2818 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2819 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2820 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2821
2822 if (s->prepare == callback)
2823 return 0;
2824
2825 if (callback && s->prepare) {
2826 s->prepare = callback;
2827 return 0;
2828 }
2829
2830 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2831 if (r < 0)
2832 return r;
2833
2834 s->prepare = callback;
2835
2836 if (callback) {
2837 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2838 if (r < 0)
2839 return r;
2840 } else
2841 prioq_remove(s->event->prepare, s, &s->prepare_index);
2842
2843 return 0;
2844}
2845
f7262a9f 2846_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2847 assert_return(s, NULL);
fd38203a
LP
2848
2849 return s->userdata;
2850}
2851
8f726607
LP
2852_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2853 void *ret;
2854
2855 assert_return(s, NULL);
2856
2857 ret = s->userdata;
2858 s->userdata = userdata;
2859
2860 return ret;
2861}
2862
b6d5481b
LP
2863static int event_source_enter_ratelimited(sd_event_source *s) {
2864 int r;
2865
2866 assert(s);
2867
2868 /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
2869 * the end of the rate limit time window, much as if it was a timer event source. */
2870
2871 if (s->ratelimited)
2872 return 0; /* Already ratelimited, this is a NOP hence */
2873
2874 /* Make sure we can install a CLOCK_MONOTONIC event further down. */
2875 r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC);
2876 if (r < 0)
2877 return r;
2878
2879 /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
2880 * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
2881 * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
2882 if (EVENT_SOURCE_IS_TIME(s->type))
2883 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2884
2885 /* Now, let's add the event source to the monotonic clock instead */
2886 r = event_source_time_prioq_put(s, &s->event->monotonic);
2887 if (r < 0)
2888 goto fail;
2889
2890 /* And let's take the event source officially offline */
2891 r = event_source_offline(s, s->enabled, /* ratelimited= */ true);
2892 if (r < 0) {
2893 event_source_time_prioq_remove(s, &s->event->monotonic);
2894 goto fail;
2895 }
2896
2897 event_source_pp_prioq_reshuffle(s);
2898
2899 log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description));
2900 return 0;
2901
2902fail:
2903 /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
2904 * space for it should already be allocated. */
2905 if (EVENT_SOURCE_IS_TIME(s->type))
2906 assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0);
2907
2908 return r;
2909}
2910
fd69f224 2911static int event_source_leave_ratelimit(sd_event_source *s, bool run_callback) {
b6d5481b
LP
2912 int r;
2913
2914 assert(s);
2915
2916 if (!s->ratelimited)
2917 return 0;
2918
2919 /* Let's take the event source out of the monotonic prioq first. */
2920 event_source_time_prioq_remove(s, &s->event->monotonic);
2921
2922 /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
2923 if (EVENT_SOURCE_IS_TIME(s->type)) {
2924 r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type));
2925 if (r < 0)
2926 goto fail;
2927 }
2928
2929 /* Let's try to take it online again. */
2930 r = event_source_online(s, s->enabled, /* ratelimited= */ false);
2931 if (r < 0) {
2932 /* Do something roughly sensible when this failed: undo the two prioq ops above */
2933 if (EVENT_SOURCE_IS_TIME(s->type))
2934 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2935
2936 goto fail;
2937 }
2938
2939 event_source_pp_prioq_reshuffle(s);
2940 ratelimit_reset(&s->rate_limit);
2941
2942 log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description));
fd69f224
MS
2943
2944 if (run_callback && s->ratelimit_expire_callback) {
2945 s->dispatching = true;
2946 r = s->ratelimit_expire_callback(s, s->userdata);
2947 s->dispatching = false;
2948
2949 if (r < 0) {
2950 log_debug_errno(r, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m",
2951 strna(s->description),
2952 event_source_type_to_string(s->type),
2953 s->exit_on_failure ? "exiting" : "disabling");
2954
2955 if (s->exit_on_failure)
2956 (void) sd_event_exit(s->event, r);
2957 }
2958
2959 if (s->n_ref == 0)
2960 source_free(s);
2961 else if (r < 0)
0a040e64 2962 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
fd69f224
MS
2963
2964 return 1;
2965 }
2966
b6d5481b
LP
2967 return 0;
2968
2969fail:
2970 /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
2971 * simply put it back in it, maybe we can then process it more successfully next iteration. */
2972 assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0);
2973
2974 return r;
2975}
2976
c2ba3ad6
LP
2977static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2978 usec_t c;
2979 assert(e);
2980 assert(a <= b);
2981
2982 if (a <= 0)
2983 return 0;
393003e1
LP
2984 if (a >= USEC_INFINITY)
2985 return USEC_INFINITY;
c2ba3ad6
LP
2986
2987 if (b <= a + 1)
2988 return a;
2989
52444dc4
LP
2990 initialize_perturb(e);
2991
c2ba3ad6
LP
2992 /*
2993 Find a good time to wake up again between times a and b. We
2994 have two goals here:
2995
2996 a) We want to wake up as seldom as possible, hence prefer
2997 later times over earlier times.
2998
2999 b) But if we have to wake up, then let's make sure to
3000 dispatch as much as possible on the entire system.
3001
3002 We implement this by waking up everywhere at the same time
850516e0 3003 within any given minute if we can, synchronised via the
c2ba3ad6 3004 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
3005 then we try to find the same spot in every 10s, then 1s and
3006 then 250ms step. Otherwise, we pick the last possible time
3007 to wake up.
c2ba3ad6
LP
3008 */
3009
850516e0
LP
3010 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
3011 if (c >= b) {
3012 if (_unlikely_(c < USEC_PER_MINUTE))
3013 return b;
3014
3015 c -= USEC_PER_MINUTE;
3016 }
3017
ba276c81
LP
3018 if (c >= a)
3019 return c;
3020
3021 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
3022 if (c >= b) {
3023 if (_unlikely_(c < USEC_PER_SEC*10))
3024 return b;
3025
3026 c -= USEC_PER_SEC*10;
3027 }
3028
850516e0
LP
3029 if (c >= a)
3030 return c;
3031
3032 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
3033 if (c >= b) {
3034 if (_unlikely_(c < USEC_PER_SEC))
3035 return b;
3036
3037 c -= USEC_PER_SEC;
3038 }
3039
3040 if (c >= a)
3041 return c;
3042
3043 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
3044 if (c >= b) {
3045 if (_unlikely_(c < USEC_PER_MSEC*250))
3046 return b;
3047
3048 c -= USEC_PER_MSEC*250;
3049 }
3050
3051 if (c >= a)
3052 return c;
3053
3054 return b;
3055}
3056
fd38203a
LP
3057static int event_arm_timer(
3058 sd_event *e,
6a0f1f6d 3059 struct clock_data *d) {
fd38203a
LP
3060
3061 struct itimerspec its = {};
c2ba3ad6
LP
3062 sd_event_source *a, *b;
3063 usec_t t;
fd38203a 3064
cde93897 3065 assert(e);
6a0f1f6d 3066 assert(d);
fd38203a 3067
d06441da 3068 if (!d->needs_rearm)
212bbb17 3069 return 0;
7e2bf71c
YW
3070
3071 d->needs_rearm = false;
212bbb17 3072
6a0f1f6d 3073 a = prioq_peek(d->earliest);
19947509 3074 assert(!a || EVENT_SOURCE_USES_TIME_PRIOQ(a->type));
b6d5481b 3075 if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) {
72aedc1e 3076
6a0f1f6d 3077 if (d->fd < 0)
c57b5ca3
LP
3078 return 0;
3079
3a43da28 3080 if (d->next == USEC_INFINITY)
72aedc1e
LP
3081 return 0;
3082
3083 /* disarm */
15c689d7
LP
3084 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3085 return -errno;
72aedc1e 3086
3a43da28 3087 d->next = USEC_INFINITY;
fd38203a 3088 return 0;
72aedc1e 3089 }
fd38203a 3090
6a0f1f6d 3091 b = prioq_peek(d->latest);
19947509
ZJS
3092 assert(!b || EVENT_SOURCE_USES_TIME_PRIOQ(b->type));
3093 assert(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 3094
b6d5481b 3095 t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b));
6a0f1f6d 3096 if (d->next == t)
fd38203a
LP
3097 return 0;
3098
6a0f1f6d 3099 assert_se(d->fd >= 0);
fd38203a 3100
c2ba3ad6 3101 if (t == 0) {
fd38203a
LP
3102 /* We don' want to disarm here, just mean some time looooong ago. */
3103 its.it_value.tv_sec = 0;
3104 its.it_value.tv_nsec = 1;
3105 } else
c2ba3ad6 3106 timespec_store(&its.it_value, t);
fd38203a 3107
15c689d7 3108 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 3109 return -errno;
fd38203a 3110
6a0f1f6d 3111 d->next = t;
fd38203a
LP
3112 return 0;
3113}
3114
9a800b56 3115static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
3116 assert(e);
3117 assert(s);
3118 assert(s->type == SOURCE_IO);
3119
9a800b56
LP
3120 /* If the event source was already pending, we just OR in the
3121 * new revents, otherwise we reset the value. The ORing is
3122 * necessary to handle EPOLLONESHOT events properly where
3123 * readability might happen independently of writability, and
3124 * we need to keep track of both */
3125
3126 if (s->pending)
3127 s->io.revents |= revents;
3128 else
3129 s->io.revents = revents;
fd38203a 3130
fd38203a
LP
3131 return source_set_pending(s, true);
3132}
3133
72aedc1e 3134static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
3135 uint64_t x;
3136 ssize_t ss;
3137
3138 assert(e);
da7e457c 3139 assert(fd >= 0);
72aedc1e 3140
305f78bf 3141 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
3142
3143 ss = read(fd, &x, sizeof(x));
3144 if (ss < 0) {
945c2931 3145 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
3146 return 0;
3147
3148 return -errno;
3149 }
3150
8d35dae7 3151 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
3152 return -EIO;
3153
cde93897 3154 if (next)
3a43da28 3155 *next = USEC_INFINITY;
72aedc1e 3156
fd38203a
LP
3157 return 0;
3158}
3159
305f78bf
LP
3160static int process_timer(
3161 sd_event *e,
3162 usec_t n,
6a0f1f6d 3163 struct clock_data *d) {
305f78bf 3164
fd38203a 3165 sd_event_source *s;
fd69f224 3166 bool callback_invoked = false;
fd38203a
LP
3167 int r;
3168
3169 assert(e);
6a0f1f6d 3170 assert(d);
fd38203a
LP
3171
3172 for (;;) {
6a0f1f6d 3173 s = prioq_peek(d->earliest);
19947509
ZJS
3174 assert(!s || EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
3175
b6d5481b
LP
3176 if (!s || time_event_source_next(s) > n)
3177 break;
3178
3179 if (s->ratelimited) {
3180 /* This is an event sources whose ratelimit window has ended. Let's turn it on
3181 * again. */
3182 assert(s->ratelimited);
3183
fd69f224 3184 r = event_source_leave_ratelimit(s, /* run_callback */ true);
b6d5481b
LP
3185 if (r < 0)
3186 return r;
fd69f224
MS
3187 else if (r == 1)
3188 callback_invoked = true;
b6d5481b
LP
3189
3190 continue;
3191 }
3192
3193 if (s->enabled == SD_EVENT_OFF || s->pending)
fd38203a
LP
3194 break;
3195
3196 r = source_set_pending(s, true);
3197 if (r < 0)
3198 return r;
3199
e1951c16 3200 event_source_time_prioq_reshuffle(s);
fd38203a
LP
3201 }
3202
fd69f224 3203 return callback_invoked;
fd38203a
LP
3204}
3205
efd3be9d
YW
3206static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
3207 int64_t min_priority = threshold;
3208 bool something_new = false;
fd38203a 3209 sd_event_source *s;
fd38203a
LP
3210 int r;
3211
3212 assert(e);
efd3be9d
YW
3213 assert(ret_min_priority);
3214
3215 if (!e->need_process_child) {
3216 *ret_min_priority = min_priority;
3217 return 0;
3218 }
fd38203a 3219
c2ba3ad6
LP
3220 e->need_process_child = false;
3221
fd38203a
LP
3222 /*
3223 So, this is ugly. We iteratively invoke waitid() with P_PID
3224 + WNOHANG for each PID we wait for, instead of using
3225 P_ALL. This is because we only want to get child
3226 information of very specific child processes, and not all
3227 of them. We might not have processed the SIGCHLD even of a
3228 previous invocation and we don't want to maintain a
3229 unbounded *per-child* event queue, hence we really don't
3230 want anything flushed out of the kernel's queue that we
3231 don't care about. Since this is O(n) this means that if you
3232 have a lot of processes you probably want to handle SIGCHLD
3233 yourself.
08cd1552
LP
3234
3235 We do not reap the children here (by using WNOWAIT), this
3236 is only done after the event source is dispatched so that
3237 the callback still sees the process as a zombie.
fd38203a
LP
3238 */
3239
90e74a66 3240 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
3241 assert(s->type == SOURCE_CHILD);
3242
efd3be9d
YW
3243 if (s->priority > threshold)
3244 continue;
3245
fd38203a
LP
3246 if (s->pending)
3247 continue;
3248
b6d5481b 3249 if (event_source_is_offline(s))
fd38203a
LP
3250 continue;
3251
f8f3f926
LP
3252 if (s->child.exited)
3253 continue;
3254
3255 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
3256 continue;
3257
fd38203a 3258 zero(s->child.siginfo);
15c689d7
LP
3259 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
3260 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
bfd9bfcc 3261 return negative_errno();
fd38203a
LP
3262
3263 if (s->child.siginfo.si_pid != 0) {
945c2931 3264 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3265
f8f3f926
LP
3266 if (zombie)
3267 s->child.exited = true;
3268
08cd1552
LP
3269 if (!zombie && (s->child.options & WEXITED)) {
3270 /* If the child isn't dead then let's
3271 * immediately remove the state change
3272 * from the queue, since there's no
3273 * benefit in leaving it queued */
3274
3275 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 3276 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
3277 }
3278
fd38203a
LP
3279 r = source_set_pending(s, true);
3280 if (r < 0)
3281 return r;
efd3be9d
YW
3282 if (r > 0) {
3283 something_new = true;
3284 min_priority = MIN(min_priority, s->priority);
3285 }
fd38203a
LP
3286 }
3287 }
3288
efd3be9d
YW
3289 *ret_min_priority = min_priority;
3290 return something_new;
fd38203a
LP
3291}
3292
f8f3f926
LP
3293static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
3294 assert(e);
3295 assert(s);
3296 assert(s->type == SOURCE_CHILD);
3297
3298 if (s->pending)
3299 return 0;
3300
b6d5481b 3301 if (event_source_is_offline(s))
f8f3f926
LP
3302 return 0;
3303
3304 if (!EVENT_SOURCE_WATCH_PIDFD(s))
3305 return 0;
3306
3307 zero(s->child.siginfo);
3308 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
3309 return -errno;
3310
3311 if (s->child.siginfo.si_pid == 0)
3312 return 0;
3313
3314 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
3315 s->child.exited = true;
3316
3317 return source_set_pending(s, true);
3318}
3319
efd3be9d 3320static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
fd38203a
LP
3321 int r;
3322
da7e457c 3323 assert(e);
97ef5391 3324 assert(d);
305f78bf 3325 assert_return(events == EPOLLIN, -EIO);
efd3be9d 3326 assert(min_priority);
fd38203a 3327
9da4cb2b
LP
3328 /* If there's a signal queued on this priority and SIGCHLD is
3329 on this priority too, then make sure to recheck the
3330 children we watch. This is because we only ever dequeue
3331 the first signal per priority, and if we dequeue one, and
3332 SIGCHLD might be enqueued later we wouldn't know, but we
3333 might have higher priority children we care about hence we
3334 need to check that explicitly. */
3335
3336 if (sigismember(&d->sigset, SIGCHLD))
3337 e->need_process_child = true;
3338
3339 /* If there's already an event source pending for this
3340 * priority we don't read another */
3341 if (d->current)
3342 return 0;
3343
fd38203a 3344 for (;;) {
0eb2e0e3 3345 struct signalfd_siginfo si;
7057bd99 3346 ssize_t n;
92daebc0 3347 sd_event_source *s = NULL;
fd38203a 3348
9da4cb2b 3349 n = read(d->fd, &si, sizeof(si));
7057bd99 3350 if (n < 0) {
945c2931 3351 if (IN_SET(errno, EAGAIN, EINTR))
efd3be9d 3352 return 0;
fd38203a
LP
3353
3354 return -errno;
3355 }
3356
7057bd99 3357 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3358 return -EIO;
3359
6eb7c172 3360 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3361
92daebc0
LP
3362 if (e->signal_sources)
3363 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3364 if (!s)
3365 continue;
9da4cb2b
LP
3366 if (s->pending)
3367 continue;
fd38203a
LP
3368
3369 s->signal.siginfo = si;
9da4cb2b
LP
3370 d->current = s;
3371
fd38203a
LP
3372 r = source_set_pending(s, true);
3373 if (r < 0)
3374 return r;
efd3be9d
YW
3375 if (r > 0 && *min_priority >= s->priority) {
3376 *min_priority = s->priority;
3377 return 1; /* an event source with smaller priority is queued. */
3378 }
9da4cb2b 3379
efd3be9d 3380 return 0;
fd38203a 3381 }
fd38203a
LP
3382}
3383
efd3be9d 3384static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) {
97ef5391
LP
3385 ssize_t n;
3386
3387 assert(e);
3388 assert(d);
3389
3390 assert_return(revents == EPOLLIN, -EIO);
3391
3392 /* If there's already an event source pending for this priority, don't read another */
3393 if (d->n_pending > 0)
3394 return 0;
3395
3396 /* Is the read buffer non-empty? If so, let's not read more */
3397 if (d->buffer_filled > 0)
3398 return 0;
3399
efd3be9d
YW
3400 if (d->priority > threshold)
3401 return 0;
3402
97ef5391
LP
3403 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3404 if (n < 0) {
3405 if (IN_SET(errno, EAGAIN, EINTR))
3406 return 0;
3407
3408 return -errno;
3409 }
3410
3411 assert(n > 0);
3412 d->buffer_filled = (size_t) n;
3413 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3414
3415 return 1;
3416}
3417
3418static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3419 assert(e);
3420 assert(d);
3421 assert(sz <= d->buffer_filled);
3422
3423 if (sz == 0)
3424 return;
3425
3426 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3427 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3428 d->buffer_filled -= sz;
3429
3430 if (d->buffer_filled == 0)
3431 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3432}
3433
3434static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3435 int r;
3436
3437 assert(e);
3438 assert(d);
3439
3440 /* If there's already an event source pending for this priority, don't read another */
3441 if (d->n_pending > 0)
3442 return 0;
3443
3444 while (d->buffer_filled > 0) {
3445 size_t sz;
3446
3447 /* Let's validate that the event structures are complete */
3448 if (d->buffer_filled < offsetof(struct inotify_event, name))
3449 return -EIO;
3450
3451 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3452 if (d->buffer_filled < sz)
3453 return -EIO;
3454
3455 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3456 struct inode_data *inode_data;
97ef5391
LP
3457
3458 /* The queue overran, let's pass this event to all event sources connected to this inotify
3459 * object */
3460
90e74a66 3461 HASHMAP_FOREACH(inode_data, d->inodes) {
97ef5391
LP
3462 sd_event_source *s;
3463
3464 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3465
b6d5481b 3466 if (event_source_is_offline(s))
97ef5391
LP
3467 continue;
3468
3469 r = source_set_pending(s, true);
3470 if (r < 0)
3471 return r;
3472 }
3473 }
3474 } else {
3475 struct inode_data *inode_data;
3476 sd_event_source *s;
3477
3478 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3479 * our watch descriptor table. */
3480 if (d->buffer.ev.mask & IN_IGNORED) {
3481
3482 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3483 if (!inode_data) {
3484 event_inotify_data_drop(e, d, sz);
3485 continue;
3486 }
3487
3488 /* The watch descriptor was removed by the kernel, let's drop it here too */
3489 inode_data->wd = -1;
3490 } else {
3491 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3492 if (!inode_data) {
3493 event_inotify_data_drop(e, d, sz);
3494 continue;
3495 }
3496 }
3497
3498 /* Trigger all event sources that are interested in these events. Also trigger all event
3499 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3500 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3501
b6d5481b 3502 if (event_source_is_offline(s))
97ef5391
LP
3503 continue;
3504
3505 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3506 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3507 continue;
3508
3509 r = source_set_pending(s, true);
3510 if (r < 0)
3511 return r;
3512 }
3513 }
3514
3515 /* Something pending now? If so, let's finish, otherwise let's read more. */
3516 if (d->n_pending > 0)
3517 return 1;
3518 }
3519
3520 return 0;
3521}
3522
3523static int process_inotify(sd_event *e) {
3524 struct inotify_data *d;
3525 int r, done = 0;
3526
3527 assert(e);
3528
3529 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3530 r = event_inotify_data_process(e, d);
3531 if (r < 0)
3532 return r;
3533 if (r > 0)
3534 done ++;
3535 }
3536
3537 return done;
3538}
3539
fd38203a 3540static int source_dispatch(sd_event_source *s) {
b778cba4 3541 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
8f5c235d 3542 EventSourceType saved_type;
fe8245eb 3543 int r = 0;
fd38203a
LP
3544
3545 assert(s);
6203e07a 3546 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3547
b778cba4
LP
3548 /* Save the event source type, here, so that we still know it after the event callback which might
3549 * invalidate the event. */
8f5c235d
LP
3550 saved_type = s->type;
3551
de02634c 3552 /* Similarly, store a reference to the event loop object, so that we can still access it after the
b778cba4
LP
3553 * callback might have invalidated/disconnected the event source. */
3554 saved_event = sd_event_ref(s->event);
3555
de02634c 3556 /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
b6d5481b
LP
3557 assert(!s->ratelimited);
3558 if (!ratelimit_below(&s->rate_limit)) {
3559 r = event_source_enter_ratelimited(s);
3560 if (r < 0)
3561 return r;
3562
3563 return 1;
3564 }
3565
945c2931 3566 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3567 r = source_set_pending(s, false);
3568 if (r < 0)
3569 return r;
3570 }
fd38203a 3571
6e9feda3
LP
3572 if (s->type != SOURCE_POST) {
3573 sd_event_source *z;
6e9feda3 3574
de02634c 3575 /* If we execute a non-post source, let's mark all post sources as pending. */
6e9feda3 3576
90e74a66 3577 SET_FOREACH(z, s->event->post_sources) {
b6d5481b 3578 if (event_source_is_offline(z))
6e9feda3
LP
3579 continue;
3580
3581 r = source_set_pending(z, true);
3582 if (r < 0)
3583 return r;
3584 }
3585 }
3586
baf76283
LP
3587 if (s->enabled == SD_EVENT_ONESHOT) {
3588 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3589 if (r < 0)
3590 return r;
3591 }
3592
12179984 3593 s->dispatching = true;
b7484e2a 3594
fd38203a
LP
3595 switch (s->type) {
3596
3597 case SOURCE_IO:
3598 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3599 break;
3600
6a0f1f6d 3601 case SOURCE_TIME_REALTIME:
a8548816 3602 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3603 case SOURCE_TIME_MONOTONIC:
3604 case SOURCE_TIME_REALTIME_ALARM:
3605 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3606 r = s->time.callback(s, s->time.next, s->userdata);
3607 break;
3608
3609 case SOURCE_SIGNAL:
3610 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3611 break;
3612
08cd1552
LP
3613 case SOURCE_CHILD: {
3614 bool zombie;
3615
945c2931 3616 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3617
fd38203a 3618 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3619
3620 /* Now, reap the PID for good. */
f8f3f926 3621 if (zombie) {
cc59d290 3622 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3623 s->child.waited = true;
3624 }
08cd1552 3625
fd38203a 3626 break;
08cd1552 3627 }
fd38203a
LP
3628
3629 case SOURCE_DEFER:
3630 r = s->defer.callback(s, s->userdata);
3631 break;
da7e457c 3632
6e9feda3
LP
3633 case SOURCE_POST:
3634 r = s->post.callback(s, s->userdata);
3635 break;
3636
6203e07a
LP
3637 case SOURCE_EXIT:
3638 r = s->exit.callback(s, s->userdata);
da7e457c 3639 break;
9d3e3aa5 3640
97ef5391
LP
3641 case SOURCE_INOTIFY: {
3642 struct sd_event *e = s->event;
3643 struct inotify_data *d;
3644 size_t sz;
3645
3646 assert(s->inotify.inode_data);
3647 assert_se(d = s->inotify.inode_data->inotify_data);
3648
3649 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3650 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3651 assert(d->buffer_filled >= sz);
3652
53baf2ef
LP
3653 /* If the inotify callback destroys the event source then this likely means we don't need to
3654 * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd
3655 * free it immediately, then we couldn't drop the event from the inotify event queue without
3656 * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it
3657 * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then
3658 * explicitly GC it after we are done dropping the inotify event from the buffer. */
3659 d->n_busy++;
97ef5391 3660 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
53baf2ef 3661 d->n_busy--;
97ef5391 3662
53baf2ef
LP
3663 /* When no event is pending anymore on this inotify object, then let's drop the event from
3664 * the inotify event queue buffer. */
97ef5391
LP
3665 if (d->n_pending == 0)
3666 event_inotify_data_drop(e, d, sz);
3667
53baf2ef
LP
3668 /* Now we don't want to access 'd' anymore, it's OK to GC now. */
3669 event_gc_inotify_data(e, d);
97ef5391
LP
3670 break;
3671 }
3672
9d3e3aa5 3673 case SOURCE_WATCHDOG:
a71fe8b8 3674 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3675 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
04499a70 3676 assert_not_reached();
fd38203a
LP
3677 }
3678
12179984
LP
3679 s->dispatching = false;
3680
b778cba4
LP
3681 if (r < 0) {
3682 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3683 strna(s->description),
3684 event_source_type_to_string(saved_type),
3685 s->exit_on_failure ? "exiting" : "disabling");
3686
3687 if (s->exit_on_failure)
3688 (void) sd_event_exit(saved_event, r);
3689 }
12179984
LP
3690
3691 if (s->n_ref == 0)
3692 source_free(s);
3693 else if (r < 0)
c3c50474 3694 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
b7484e2a 3695
6203e07a 3696 return 1;
fd38203a
LP
3697}
3698
3699static int event_prepare(sd_event *e) {
3700 int r;
3701
3702 assert(e);
3703
3704 for (;;) {
3705 sd_event_source *s;
3706
3707 s = prioq_peek(e->prepare);
b6d5481b 3708 if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s))
fd38203a
LP
3709 break;
3710
3711 s->prepare_iteration = e->iteration;
3712 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3713 if (r < 0)
3714 return r;
3715
3716 assert(s->prepare);
12179984
LP
3717
3718 s->dispatching = true;
fd38203a 3719 r = s->prepare(s, s->userdata);
12179984
LP
3720 s->dispatching = false;
3721
b778cba4
LP
3722 if (r < 0) {
3723 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3724 strna(s->description),
3725 event_source_type_to_string(s->type),
3726 s->exit_on_failure ? "exiting" : "disabling");
3727
3728 if (s->exit_on_failure)
3729 (void) sd_event_exit(e, r);
3730 }
fd38203a 3731
12179984
LP
3732 if (s->n_ref == 0)
3733 source_free(s);
3734 else if (r < 0)
c3c50474 3735 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
fd38203a
LP
3736 }
3737
3738 return 0;
3739}
3740
6203e07a 3741static int dispatch_exit(sd_event *e) {
da7e457c
LP
3742 sd_event_source *p;
3743 int r;
3744
3745 assert(e);
3746
6203e07a 3747 p = prioq_peek(e->exit);
19947509
ZJS
3748 assert(!p || p->type == SOURCE_EXIT);
3749
b6d5481b 3750 if (!p || event_source_is_offline(p)) {
da7e457c
LP
3751 e->state = SD_EVENT_FINISHED;
3752 return 0;
3753 }
3754
f814c871 3755 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
da7e457c 3756 e->iteration++;
6203e07a 3757 e->state = SD_EVENT_EXITING;
da7e457c 3758 r = source_dispatch(p);
2b0c9ef7 3759 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3760 return r;
3761}
3762
c2ba3ad6
LP
3763static sd_event_source* event_next_pending(sd_event *e) {
3764 sd_event_source *p;
3765
da7e457c
LP
3766 assert(e);
3767
c2ba3ad6
LP
3768 p = prioq_peek(e->pending);
3769 if (!p)
3770 return NULL;
3771
b6d5481b 3772 if (event_source_is_offline(p))
c2ba3ad6
LP
3773 return NULL;
3774
3775 return p;
3776}
3777
cde93897
LP
3778static int arm_watchdog(sd_event *e) {
3779 struct itimerspec its = {};
3780 usec_t t;
cde93897
LP
3781
3782 assert(e);
3783 assert(e->watchdog_fd >= 0);
3784
3785 t = sleep_between(e,
a595fb5c
YW
3786 usec_add(e->watchdog_last, (e->watchdog_period / 2)),
3787 usec_add(e->watchdog_last, (e->watchdog_period * 3 / 4)));
cde93897
LP
3788
3789 timespec_store(&its.it_value, t);
3790
75145780
LP
3791 /* Make sure we never set the watchdog to 0, which tells the
3792 * kernel to disable it. */
3793 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3794 its.it_value.tv_nsec = 1;
3795
7c248223 3796 return RET_NERRNO(timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL));
cde93897
LP
3797}
3798
3799static int process_watchdog(sd_event *e) {
3800 assert(e);
3801
3802 if (!e->watchdog)
3803 return 0;
3804
3805 /* Don't notify watchdog too often */
3806 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3807 return 0;
3808
3809 sd_notify(false, "WATCHDOG=1");
3810 e->watchdog_last = e->timestamp.monotonic;
3811
3812 return arm_watchdog(e);
3813}
3814
97ef5391
LP
3815static void event_close_inode_data_fds(sd_event *e) {
3816 struct inode_data *d;
3817
3818 assert(e);
3819
3820 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3821 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3822 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3823 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3824 * compromise. */
3825
3826 while ((d = e->inode_data_to_close)) {
3827 assert(d->fd >= 0);
3828 d->fd = safe_close(d->fd);
3829
3830 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3831 }
3832}
3833
c45a5a74
TG
3834_public_ int sd_event_prepare(sd_event *e) {
3835 int r;
fd38203a 3836
da7e457c 3837 assert_return(e, -EINVAL);
b937d761 3838 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3839 assert_return(!event_pid_changed(e), -ECHILD);
3840 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3841 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3842
e5446015
LP
3843 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3844 * this check here once, since gettid() is typically not cached, and thus want to minimize
3845 * syscalls */
3846 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3847
f814c871
LP
3848 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
3849 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3850
6203e07a 3851 if (e->exit_requested)
c45a5a74 3852 goto pending;
fd38203a
LP
3853
3854 e->iteration++;
3855
0be6c2f6 3856 e->state = SD_EVENT_PREPARING;
fd38203a 3857 r = event_prepare(e);
0be6c2f6 3858 e->state = SD_EVENT_INITIAL;
fd38203a 3859 if (r < 0)
c45a5a74 3860 return r;
fd38203a 3861
6a0f1f6d
LP
3862 r = event_arm_timer(e, &e->realtime);
3863 if (r < 0)
c45a5a74 3864 return r;
6a0f1f6d 3865
a8548816
TG
3866 r = event_arm_timer(e, &e->boottime);
3867 if (r < 0)
c45a5a74 3868 return r;
a8548816 3869
6a0f1f6d
LP
3870 r = event_arm_timer(e, &e->monotonic);
3871 if (r < 0)
c45a5a74 3872 return r;
6a0f1f6d
LP
3873
3874 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3875 if (r < 0)
c45a5a74 3876 return r;
fd38203a 3877
6a0f1f6d 3878 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3879 if (r < 0)
c45a5a74 3880 return r;
fd38203a 3881
97ef5391
LP
3882 event_close_inode_data_fds(e);
3883
1b5995b0 3884 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3885 goto pending;
3886
2b0c9ef7 3887 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3888
3889 return 0;
3890
3891pending:
2b0c9ef7 3892 e->state = SD_EVENT_ARMED;
6d148a84
TG
3893 r = sd_event_wait(e, 0);
3894 if (r == 0)
2b0c9ef7 3895 e->state = SD_EVENT_ARMED;
6d148a84
TG
3896
3897 return r;
c45a5a74
TG
3898}
3899
798445ab
LP
3900static int epoll_wait_usec(
3901 int fd,
3902 struct epoll_event *events,
3903 int maxevents,
3904 usec_t timeout) {
3905
7c248223 3906 int msec;
39f756d3
ZJS
3907#if 0
3908 static bool epoll_pwait2_absent = false;
798445ab 3909
39f756d3
ZJS
3910 /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not.
3911 *
3912 * FIXME: this is temporarily disabled until epoll_pwait2() becomes more widely available.
3913 * See https://github.com/systemd/systemd/pull/18973 and
3914 * https://github.com/systemd/systemd/issues/19052. */
798445ab
LP
3915
3916 if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
3917 struct timespec ts;
3918
3919 r = epoll_pwait2(fd,
3920 events,
3921 maxevents,
3922 timespec_store(&ts, timeout),
3923 NULL);
3924 if (r >= 0)
3925 return r;
7cb45dbf 3926 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
798445ab
LP
3927 return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
3928 * supported. */
3929
3930 epoll_pwait2_absent = true;
3931 }
39f756d3 3932#endif
798445ab
LP
3933
3934 if (timeout == USEC_INFINITY)
3935 msec = -1;
3936 else {
3937 usec_t k;
3938
3939 k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
3940 if (k >= INT_MAX)
3941 msec = INT_MAX; /* Saturate */
3942 else
3943 msec = (int) k;
3944 }
3945
7c248223 3946 return RET_NERRNO(epoll_wait(fd, events, maxevents, msec));
798445ab
LP
3947}
3948
efd3be9d 3949static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
319a4f4b 3950 size_t n_event_queue, m, n_event_max;
efd3be9d
YW
3951 int64_t min_priority = threshold;
3952 bool something_new = false;
798445ab 3953 int r;
c45a5a74 3954
efd3be9d
YW
3955 assert(e);
3956 assert(ret_min_priority);
6a0f1f6d 3957
8b9708d1 3958 n_event_queue = MAX(e->n_sources, 1u);
319a4f4b 3959 if (!GREEDY_REALLOC(e->event_queue, n_event_queue))
5cddd924 3960 return -ENOMEM;
fd38203a 3961
319a4f4b
LP
3962 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
3963
97ef5391
LP
3964 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3965 if (e->inotify_data_buffered)
798445ab 3966 timeout = 0;
97ef5391 3967
8b9708d1 3968 for (;;) {
319a4f4b
LP
3969 r = epoll_wait_usec(
3970 e->epoll_fd,
3971 e->event_queue,
3972 n_event_max,
3973 timeout);
798445ab 3974 if (r < 0)
efd3be9d 3975 return r;
c45a5a74 3976
8b9708d1
YW
3977 m = (size_t) r;
3978
319a4f4b 3979 if (m < n_event_max)
8b9708d1
YW
3980 break;
3981
319a4f4b 3982 if (n_event_max >= n_event_queue * 10)
8b9708d1
YW
3983 break;
3984
319a4f4b 3985 if (!GREEDY_REALLOC(e->event_queue, n_event_max + n_event_queue))
8b9708d1
YW
3986 return -ENOMEM;
3987
319a4f4b 3988 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
798445ab 3989 timeout = 0;
da7e457c 3990 }
fd38203a 3991
efd3be9d
YW
3992 /* Set timestamp only when this is called first time. */
3993 if (threshold == INT64_MAX)
3994 triple_timestamp_get(&e->timestamp);
fd38203a 3995
8b9708d1 3996 for (size_t i = 0; i < m; i++) {
fd38203a 3997
5cddd924
LP
3998 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3999 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 4000 else {
5cddd924 4001 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
4002
4003 switch (*t) {
4004
f8f3f926 4005 case WAKEUP_EVENT_SOURCE: {
5cddd924 4006 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
4007
4008 assert(s);
4009
efd3be9d
YW
4010 if (s->priority > threshold)
4011 continue;
4012
4013 min_priority = MIN(min_priority, s->priority);
4014
f8f3f926
LP
4015 switch (s->type) {
4016
4017 case SOURCE_IO:
5cddd924 4018 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
4019 break;
4020
4021 case SOURCE_CHILD:
5cddd924 4022 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
4023 break;
4024
4025 default:
04499a70 4026 assert_not_reached();
f8f3f926
LP
4027 }
4028
9da4cb2b 4029 break;
f8f3f926 4030 }
fd38203a 4031
9da4cb2b 4032 case WAKEUP_CLOCK_DATA: {
5cddd924 4033 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
4034
4035 assert(d);
4036
5cddd924 4037 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
4038 break;
4039 }
4040
4041 case WAKEUP_SIGNAL_DATA:
efd3be9d 4042 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority);
9da4cb2b
LP
4043 break;
4044
97ef5391 4045 case WAKEUP_INOTIFY_DATA:
efd3be9d 4046 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold);
97ef5391
LP
4047 break;
4048
9da4cb2b 4049 default:
04499a70 4050 assert_not_reached();
9da4cb2b
LP
4051 }
4052 }
efd3be9d
YW
4053 if (r < 0)
4054 return r;
4055 if (r > 0)
4056 something_new = true;
4057 }
4058
4059 *ret_min_priority = min_priority;
4060 return something_new;
4061}
4062
4063_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
4064 int r;
4065
4066 assert_return(e, -EINVAL);
4067 assert_return(e = event_resolve(e), -ENOPKG);
4068 assert_return(!event_pid_changed(e), -ECHILD);
4069 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4070 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
4071
4072 if (e->exit_requested) {
4073 e->state = SD_EVENT_PENDING;
4074 return 1;
4075 }
4076
4077 for (int64_t threshold = INT64_MAX; ; threshold--) {
4078 int64_t epoll_min_priority, child_min_priority;
4079
4080 /* There may be a possibility that new epoll (especially IO) and child events are
4081 * triggered just after process_epoll() call but before process_child(), and the new IO
4082 * events may have higher priority than the child events. To salvage these events,
4083 * let's call epoll_wait() again, but accepts only events with higher priority than the
4084 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
4085 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
4086 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
4087
4088 r = process_epoll(e, timeout, threshold, &epoll_min_priority);
4089 if (r == -EINTR) {
4090 e->state = SD_EVENT_PENDING;
4091 return 1;
4092 }
4093 if (r < 0)
4094 goto finish;
4095 if (r == 0 && threshold < INT64_MAX)
4096 /* No new epoll event. */
4097 break;
4098
4099 r = process_child(e, threshold, &child_min_priority);
fd38203a 4100 if (r < 0)
da7e457c 4101 goto finish;
efd3be9d
YW
4102 if (r == 0)
4103 /* No new child event. */
4104 break;
4105
4106 threshold = MIN(epoll_min_priority, child_min_priority);
4107 if (threshold == INT64_MIN)
4108 break;
4109
4110 timeout = 0;
fd38203a
LP
4111 }
4112
cde93897
LP
4113 r = process_watchdog(e);
4114 if (r < 0)
4115 goto finish;
4116
fd69f224 4117 r = process_inotify(e);
6a0f1f6d
LP
4118 if (r < 0)
4119 goto finish;
4120
fd69f224 4121 r = process_timer(e, e->timestamp.realtime, &e->realtime);
a8548816
TG
4122 if (r < 0)
4123 goto finish;
4124
fd69f224 4125 r = process_timer(e, e->timestamp.boottime, &e->boottime);
6a0f1f6d
LP
4126 if (r < 0)
4127 goto finish;
4128
4129 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 4130 if (r < 0)
da7e457c 4131 goto finish;
fd38203a 4132
e475d10c 4133 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 4134 if (r < 0)
da7e457c 4135 goto finish;
fd38203a 4136
fd69f224 4137 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
97ef5391
LP
4138 if (r < 0)
4139 goto finish;
fd69f224
MS
4140 else if (r == 1) {
4141 /* Ratelimit expiry callback was called. Let's postpone processing pending sources and
4142 * put loop in the initial state in order to evaluate (in the next iteration) also sources
4143 * there were potentially re-enabled by the callback.
4144 *
4145 * Wondering why we treat only this invocation of process_timer() differently? Once event
4146 * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence
4147 * ratelimit expiry callback is never called for any other timer type. */
4148 r = 0;
4149 goto finish;
4150 }
97ef5391 4151
c45a5a74
TG
4152 if (event_next_pending(e)) {
4153 e->state = SD_EVENT_PENDING;
c45a5a74 4154 return 1;
da7e457c
LP
4155 }
4156
c45a5a74 4157 r = 0;
fd38203a 4158
da7e457c 4159finish:
2b0c9ef7 4160 e->state = SD_EVENT_INITIAL;
da7e457c
LP
4161
4162 return r;
fd38203a
LP
4163}
4164
c45a5a74
TG
4165_public_ int sd_event_dispatch(sd_event *e) {
4166 sd_event_source *p;
4167 int r;
4168
4169 assert_return(e, -EINVAL);
b937d761 4170 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4171 assert_return(!event_pid_changed(e), -ECHILD);
4172 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4173 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
4174
4175 if (e->exit_requested)
4176 return dispatch_exit(e);
4177
4178 p = event_next_pending(e);
4179 if (p) {
f814c871 4180 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
c45a5a74
TG
4181
4182 e->state = SD_EVENT_RUNNING;
4183 r = source_dispatch(p);
2b0c9ef7 4184 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4185 return r;
4186 }
4187
2b0c9ef7 4188 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4189
4190 return 1;
4191}
4192
34b87517 4193static void event_log_delays(sd_event *e) {
442ac269
YW
4194 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
4195 size_t l, i;
34b87517 4196
442ac269
YW
4197 p = b;
4198 l = sizeof(b);
4199 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
4200 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
4201 e->delays[i] = 0;
4202 }
442ac269 4203 log_debug("Event loop iterations: %s", b);
34b87517
VC
4204}
4205
c45a5a74
TG
4206_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
4207 int r;
4208
4209 assert_return(e, -EINVAL);
b937d761 4210 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4211 assert_return(!event_pid_changed(e), -ECHILD);
4212 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 4213 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 4214
e6a7bee5 4215 if (e->profile_delays && e->last_run_usec != 0) {
34b87517
VC
4216 usec_t this_run;
4217 unsigned l;
4218
4219 this_run = now(CLOCK_MONOTONIC);
4220
e6a7bee5 4221 l = u64log2(this_run - e->last_run_usec);
cb9d621e 4222 assert(l < ELEMENTSOF(e->delays));
34b87517
VC
4223 e->delays[l]++;
4224
e6a7bee5 4225 if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
34b87517 4226 event_log_delays(e);
e6a7bee5 4227 e->last_log_usec = this_run;
34b87517
VC
4228 }
4229 }
4230
f814c871
LP
4231 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4232 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
4233
c45a5a74 4234 r = sd_event_prepare(e);
53bac4e0
LP
4235 if (r == 0)
4236 /* There was nothing? Then wait... */
4237 r = sd_event_wait(e, timeout);
c45a5a74 4238
34b87517 4239 if (e->profile_delays)
e6a7bee5 4240 e->last_run_usec = now(CLOCK_MONOTONIC);
34b87517 4241
02d30981 4242 if (r > 0) {
53bac4e0 4243 /* There's something now, then let's dispatch it */
02d30981
TG
4244 r = sd_event_dispatch(e);
4245 if (r < 0)
4246 return r;
53bac4e0
LP
4247
4248 return 1;
4249 }
4250
4251 return r;
c45a5a74
TG
4252}
4253
f7262a9f 4254_public_ int sd_event_loop(sd_event *e) {
fd38203a
LP
4255 int r;
4256
da7e457c 4257 assert_return(e, -EINVAL);
b937d761 4258 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 4259 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 4260 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 4261
9f6ef467 4262 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
fd38203a 4263
da7e457c 4264 while (e->state != SD_EVENT_FINISHED) {
f5fbe71d 4265 r = sd_event_run(e, UINT64_MAX);
fd38203a 4266 if (r < 0)
30dd293c 4267 return r;
fd38203a
LP
4268 }
4269
30dd293c 4270 return e->exit_code;
fd38203a
LP
4271}
4272
9b364545 4273_public_ int sd_event_get_fd(sd_event *e) {
9b364545 4274 assert_return(e, -EINVAL);
b937d761 4275 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
4276 assert_return(!event_pid_changed(e), -ECHILD);
4277
4278 return e->epoll_fd;
4279}
4280
f7262a9f 4281_public_ int sd_event_get_state(sd_event *e) {
da7e457c 4282 assert_return(e, -EINVAL);
b937d761 4283 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4284 assert_return(!event_pid_changed(e), -ECHILD);
4285
4286 return e->state;
4287}
4288
6203e07a 4289_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 4290 assert_return(e, -EINVAL);
b937d761 4291 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 4292 assert_return(code, -EINVAL);
da7e457c 4293 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4294
6203e07a
LP
4295 if (!e->exit_requested)
4296 return -ENODATA;
4297
4298 *code = e->exit_code;
4299 return 0;
fd38203a
LP
4300}
4301
6203e07a 4302_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 4303 assert_return(e, -EINVAL);
b937d761 4304 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4305 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4306 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4307
6203e07a
LP
4308 e->exit_requested = true;
4309 e->exit_code = code;
4310
fd38203a
LP
4311 return 0;
4312}
46e8c825 4313
6a0f1f6d 4314_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 4315 assert_return(e, -EINVAL);
b937d761 4316 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 4317 assert_return(usec, -EINVAL);
46e8c825
LP
4318 assert_return(!event_pid_changed(e), -ECHILD);
4319
e475d10c
LP
4320 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
4321 return -EOPNOTSUPP;
4322
4323 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
4324 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
4325 * the purpose of getting the time this doesn't matter. */
3411372e
LP
4326 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
4327 return -EOPNOTSUPP;
4328
e475d10c 4329 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 4330 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
4331 *usec = now(clock);
4332 return 1;
4333 }
46e8c825 4334
e475d10c 4335 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
4336 return 0;
4337}
afc6adb5
LP
4338
4339_public_ int sd_event_default(sd_event **ret) {
39883f62 4340 sd_event *e = NULL;
afc6adb5
LP
4341 int r;
4342
4343 if (!ret)
4344 return !!default_event;
4345
4346 if (default_event) {
4347 *ret = sd_event_ref(default_event);
4348 return 0;
4349 }
4350
4351 r = sd_event_new(&e);
4352 if (r < 0)
4353 return r;
4354
4355 e->default_event_ptr = &default_event;
4356 e->tid = gettid();
4357 default_event = e;
4358
4359 *ret = e;
4360 return 1;
4361}
4362
4363_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
4364 assert_return(e, -EINVAL);
b937d761 4365 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 4366 assert_return(tid, -EINVAL);
76b54375 4367 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 4368
76b54375
LP
4369 if (e->tid != 0) {
4370 *tid = e->tid;
4371 return 0;
4372 }
4373
4374 return -ENXIO;
afc6adb5 4375}
cde93897
LP
4376
4377_public_ int sd_event_set_watchdog(sd_event *e, int b) {
4378 int r;
4379
4380 assert_return(e, -EINVAL);
b937d761 4381 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 4382 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
4383
4384 if (e->watchdog == !!b)
4385 return e->watchdog;
4386
4387 if (b) {
09812eb7
LP
4388 r = sd_watchdog_enabled(false, &e->watchdog_period);
4389 if (r <= 0)
cde93897 4390 return r;
cde93897
LP
4391
4392 /* Issue first ping immediately */
4393 sd_notify(false, "WATCHDOG=1");
4394 e->watchdog_last = now(CLOCK_MONOTONIC);
4395
4396 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
4397 if (e->watchdog_fd < 0)
4398 return -errno;
4399
4400 r = arm_watchdog(e);
4401 if (r < 0)
4402 goto fail;
4403
1eac7948 4404 struct epoll_event ev = {
a82f89aa
LP
4405 .events = EPOLLIN,
4406 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
4407 };
cde93897 4408
15c689d7 4409 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
4410 r = -errno;
4411 goto fail;
4412 }
4413
4414 } else {
4415 if (e->watchdog_fd >= 0) {
5a795bff 4416 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 4417 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4418 }
4419 }
4420
4421 e->watchdog = !!b;
4422 return e->watchdog;
4423
4424fail:
03e334a1 4425 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4426 return r;
4427}
8f726607
LP
4428
4429_public_ int sd_event_get_watchdog(sd_event *e) {
4430 assert_return(e, -EINVAL);
b937d761 4431 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
4432 assert_return(!event_pid_changed(e), -ECHILD);
4433
4434 return e->watchdog;
4435}
60a3b1e1
LP
4436
4437_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
4438 assert_return(e, -EINVAL);
b937d761 4439 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
4440 assert_return(!event_pid_changed(e), -ECHILD);
4441
4442 *ret = e->iteration;
4443 return 0;
4444}
15723a1d
LP
4445
4446_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
4447 assert_return(s, -EINVAL);
4448
4449 s->destroy_callback = callback;
4450 return 0;
4451}
4452
4453_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
4454 assert_return(s, -EINVAL);
4455
4456 if (ret)
4457 *ret = s->destroy_callback;
4458
4459 return !!s->destroy_callback;
4460}
2382c936
YW
4461
4462_public_ int sd_event_source_get_floating(sd_event_source *s) {
4463 assert_return(s, -EINVAL);
4464
4465 return s->floating;
4466}
4467
4468_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
4469 assert_return(s, -EINVAL);
4470
4471 if (s->floating == !!b)
4472 return 0;
4473
4474 if (!s->event) /* Already disconnected */
4475 return -ESTALE;
4476
4477 s->floating = b;
4478
4479 if (b) {
4480 sd_event_source_ref(s);
4481 sd_event_unref(s->event);
4482 } else {
4483 sd_event_ref(s->event);
4484 sd_event_source_unref(s);
4485 }
4486
4487 return 1;
4488}
b778cba4
LP
4489
4490_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4491 assert_return(s, -EINVAL);
4492 assert_return(s->type != SOURCE_EXIT, -EDOM);
4493
4494 return s->exit_on_failure;
4495}
4496
4497_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4498 assert_return(s, -EINVAL);
4499 assert_return(s->type != SOURCE_EXIT, -EDOM);
4500
4501 if (s->exit_on_failure == !!b)
4502 return 0;
4503
4504 s->exit_on_failure = b;
4505 return 1;
4506}
b6d5481b
LP
4507
4508_public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) {
4509 int r;
4510
4511 assert_return(s, -EINVAL);
4512
4513 /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
4514 * so is a programming error. */
4515 assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM);
4516
4517 /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
4518 * non-ratelimited. */
fd69f224 4519 r = event_source_leave_ratelimit(s, /* run_callback */ false);
b6d5481b
LP
4520 if (r < 0)
4521 return r;
4522
4523 s->rate_limit = (RateLimit) { interval, burst };
4524 return 0;
fd69f224
MS
4525}
4526
4527_public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) {
4528 assert_return(s, -EINVAL);
4529
4530 s->ratelimit_expire_callback = callback;
4531 return 0;
b6d5481b
LP
4532}
4533
4534_public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) {
4535 assert_return(s, -EINVAL);
4536
4537 /* Querying whether an event source has ratelimiting configured is not a loggable offsense, hence
4538 * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error */
4539 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4540 return -EDOM;
4541
4542 if (!ratelimit_configured(&s->rate_limit))
4543 return -ENOEXEC;
4544
4545 if (ret_interval)
4546 *ret_interval = s->rate_limit.interval;
4547 if (ret_burst)
4548 *ret_burst = s->rate_limit.burst;
4549
4550 return 0;
4551}
4552
4553_public_ int sd_event_source_is_ratelimited(sd_event_source *s) {
4554 assert_return(s, -EINVAL);
4555
4556 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4557 return false;
4558
4559 if (!ratelimit_configured(&s->rate_limit))
4560 return false;
4561
4562 return s->ratelimited;
4563}