]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
Merge pull request #23827 from yuwata/sd-event-process-buffered-inotify-data
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
b6d5481b
LP
40static bool event_source_is_online(sd_event_source *s) {
41 assert(s);
42 return s->enabled != SD_EVENT_OFF && !s->ratelimited;
43}
44
45static bool event_source_is_offline(sd_event_source *s) {
46 assert(s);
47 return s->enabled == SD_EVENT_OFF || s->ratelimited;
48}
49
55cbfaa5
DM
50static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
51 [SOURCE_IO] = "io",
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 63 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
64};
65
66DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67
b6d5481b
LP
68#define EVENT_SOURCE_IS_TIME(t) \
69 IN_SET((t), \
70 SOURCE_TIME_REALTIME, \
71 SOURCE_TIME_BOOTTIME, \
72 SOURCE_TIME_MONOTONIC, \
73 SOURCE_TIME_REALTIME_ALARM, \
74 SOURCE_TIME_BOOTTIME_ALARM)
75
76#define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
77 IN_SET((t), \
78 SOURCE_IO, \
79 SOURCE_TIME_REALTIME, \
80 SOURCE_TIME_BOOTTIME, \
81 SOURCE_TIME_MONOTONIC, \
82 SOURCE_TIME_REALTIME_ALARM, \
83 SOURCE_TIME_BOOTTIME_ALARM, \
84 SOURCE_SIGNAL, \
85 SOURCE_DEFER, \
86 SOURCE_INOTIFY)
6a0f1f6d 87
19947509
ZJS
88/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
89 * Time sources and ratelimited sources can be passed, so effectively this is the same as the
90 * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */
91#define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t)
92
fd38203a 93struct sd_event {
da7e457c 94 unsigned n_ref;
fd38203a
LP
95
96 int epoll_fd;
cde93897 97 int watchdog_fd;
fd38203a
LP
98
99 Prioq *pending;
100 Prioq *prepare;
c2ba3ad6 101
a8548816 102 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
103 * can add support for more clocks when the kernel learns to
104 * deal with them, too. */
105 struct clock_data realtime;
a8548816 106 struct clock_data boottime;
6a0f1f6d
LP
107 struct clock_data monotonic;
108 struct clock_data realtime_alarm;
109 struct clock_data boottime_alarm;
fd38203a 110
da7e457c
LP
111 usec_t perturb;
112
9da4cb2b
LP
113 sd_event_source **signal_sources; /* indexed by signal number */
114 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
115
116 Hashmap *child_sources;
b6d5481b 117 unsigned n_online_child_sources;
fd38203a 118
6e9feda3
LP
119 Set *post_sources;
120
6203e07a 121 Prioq *exit;
fd38203a 122
97ef5391
LP
123 Hashmap *inotify_data; /* indexed by priority */
124
125 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
126 LIST_HEAD(struct inode_data, inode_data_to_close);
127
128 /* A list of inotify objects that already have events buffered which aren't processed yet */
129 LIST_HEAD(struct inotify_data, inotify_data_buffered);
130
da7e457c 131 pid_t original_pid;
c2ba3ad6 132
60a3b1e1 133 uint64_t iteration;
e475d10c 134 triple_timestamp timestamp;
da7e457c 135 int state;
eaa3cbef 136
6203e07a 137 bool exit_requested:1;
da7e457c 138 bool need_process_child:1;
cde93897 139 bool watchdog:1;
34b87517 140 bool profile_delays:1;
afc6adb5 141
6203e07a
LP
142 int exit_code;
143
afc6adb5
LP
144 pid_t tid;
145 sd_event **default_event_ptr;
cde93897
LP
146
147 usec_t watchdog_last, watchdog_period;
15b38f93
LP
148
149 unsigned n_sources;
a71fe8b8 150
5cddd924 151 struct epoll_event *event_queue;
5cddd924 152
a71fe8b8 153 LIST_HEAD(sd_event_source, sources);
34b87517 154
e6a7bee5 155 usec_t last_run_usec, last_log_usec;
34b87517 156 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
157};
158
b937d761
NM
159static thread_local sd_event *default_event = NULL;
160
a71fe8b8 161static void source_disconnect(sd_event_source *s);
97ef5391 162static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 163
b937d761
NM
164static sd_event *event_resolve(sd_event *e) {
165 return e == SD_EVENT_DEFAULT ? default_event : e;
166}
167
fd38203a
LP
168static int pending_prioq_compare(const void *a, const void *b) {
169 const sd_event_source *x = a, *y = b;
9c57a73b 170 int r;
fd38203a
LP
171
172 assert(x->pending);
173 assert(y->pending);
174
baf76283 175 /* Enabled ones first */
06e13147
YW
176 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
177 if (r != 0)
178 return r;
fd38203a 179
b6d5481b
LP
180 /* Non rate-limited ones first. */
181 r = CMP(!!x->ratelimited, !!y->ratelimited);
182 if (r != 0)
183 return r;
184
fd38203a 185 /* Lower priority values first */
9c57a73b
YW
186 r = CMP(x->priority, y->priority);
187 if (r != 0)
188 return r;
fd38203a
LP
189
190 /* Older entries first */
9c57a73b 191 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
192}
193
194static int prepare_prioq_compare(const void *a, const void *b) {
195 const sd_event_source *x = a, *y = b;
9c57a73b 196 int r;
fd38203a
LP
197
198 assert(x->prepare);
199 assert(y->prepare);
200
8046c457 201 /* Enabled ones first */
06e13147
YW
202 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
203 if (r != 0)
204 return r;
8046c457 205
b6d5481b
LP
206 /* Non rate-limited ones first. */
207 r = CMP(!!x->ratelimited, !!y->ratelimited);
208 if (r != 0)
209 return r;
210
fd38203a
LP
211 /* Move most recently prepared ones last, so that we can stop
212 * preparing as soon as we hit one that has already been
213 * prepared in the current iteration */
9c57a73b
YW
214 r = CMP(x->prepare_iteration, y->prepare_iteration);
215 if (r != 0)
216 return r;
fd38203a 217
fd38203a 218 /* Lower priority values first */
9c57a73b 219 return CMP(x->priority, y->priority);
fd38203a
LP
220}
221
b6d5481b
LP
222static usec_t time_event_source_next(const sd_event_source *s) {
223 assert(s);
224
225 /* We have two kinds of event sources that have elapsation times associated with them: the actual
226 * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
227 * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
228 * looking at here. */
229
230 if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
231 assert(s->rate_limit.begin != 0);
232 assert(s->rate_limit.interval != 0);
233 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
234 }
235
236 /* Otherwise this must be a time event source, if not ratelimited */
237 if (EVENT_SOURCE_IS_TIME(s->type))
238 return s->time.next;
239
240 return USEC_INFINITY;
241}
242
1bce0ffa 243static usec_t time_event_source_latest(const sd_event_source *s) {
b6d5481b
LP
244 assert(s);
245
246 if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the
247 * same, as we should avoid adding additional inaccuracy on an inaccuracy time
248 * window */
249 assert(s->rate_limit.begin != 0);
250 assert(s->rate_limit.interval != 0);
251 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
252 }
253
254 /* Must be a time event source, if not ratelimited */
255 if (EVENT_SOURCE_IS_TIME(s->type))
256 return usec_add(s->time.next, s->time.accuracy);
257
258 return USEC_INFINITY;
1bce0ffa
LP
259}
260
81107b84
LP
261static bool event_source_timer_candidate(const sd_event_source *s) {
262 assert(s);
263
264 /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
265 * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
266 return !s->pending || s->ratelimited;
267}
268
269static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) {
c2ba3ad6 270 const sd_event_source *x = a, *y = b;
06e13147 271 int r;
c2ba3ad6 272
baf76283 273 /* Enabled ones first */
06e13147
YW
274 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
275 if (r != 0)
276 return r;
c2ba3ad6 277
81107b84 278 /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
06e13147
YW
279 r = CMP(!event_source_timer_candidate(x), !event_source_timer_candidate(y));
280 if (r != 0)
281 return r;
c2ba3ad6
LP
282
283 /* Order by time */
81107b84
LP
284 return CMP(time_func(x), time_func(y));
285}
286
287static int earliest_time_prioq_compare(const void *a, const void *b) {
288 return time_prioq_compare(a, b, time_event_source_next);
289}
290
291static int latest_time_prioq_compare(const void *a, const void *b) {
292 return time_prioq_compare(a, b, time_event_source_latest);
c2ba3ad6
LP
293}
294
6203e07a 295static int exit_prioq_compare(const void *a, const void *b) {
da7e457c 296 const sd_event_source *x = a, *y = b;
06e13147 297 int r;
da7e457c 298
6203e07a
LP
299 assert(x->type == SOURCE_EXIT);
300 assert(y->type == SOURCE_EXIT);
da7e457c 301
baf76283 302 /* Enabled ones first */
06e13147
YW
303 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
304 if (r != 0)
305 return r;
da7e457c
LP
306
307 /* Lower priority values first */
6dd91b36 308 return CMP(x->priority, y->priority);
da7e457c
LP
309}
310
6a0f1f6d
LP
311static void free_clock_data(struct clock_data *d) {
312 assert(d);
9da4cb2b 313 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
314
315 safe_close(d->fd);
316 prioq_free(d->earliest);
317 prioq_free(d->latest);
318}
319
8301aa0b 320static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
321 sd_event_source *s;
322
fd38203a 323 assert(e);
a71fe8b8
LP
324
325 while ((s = e->sources)) {
326 assert(s->floating);
327 source_disconnect(s);
328 sd_event_source_unref(s);
329 }
330
15b38f93 331 assert(e->n_sources == 0);
fd38203a 332
afc6adb5
LP
333 if (e->default_event_ptr)
334 *(e->default_event_ptr) = NULL;
335
03e334a1 336 safe_close(e->epoll_fd);
03e334a1 337 safe_close(e->watchdog_fd);
cde93897 338
6a0f1f6d 339 free_clock_data(&e->realtime);
a8548816 340 free_clock_data(&e->boottime);
6a0f1f6d
LP
341 free_clock_data(&e->monotonic);
342 free_clock_data(&e->realtime_alarm);
343 free_clock_data(&e->boottime_alarm);
344
fd38203a
LP
345 prioq_free(e->pending);
346 prioq_free(e->prepare);
6203e07a 347 prioq_free(e->exit);
fd38203a
LP
348
349 free(e->signal_sources);
9da4cb2b 350 hashmap_free(e->signal_data);
fd38203a 351
97ef5391
LP
352 hashmap_free(e->inotify_data);
353
fd38203a 354 hashmap_free(e->child_sources);
6e9feda3 355 set_free(e->post_sources);
8301aa0b 356
5cddd924
LP
357 free(e->event_queue);
358
8301aa0b 359 return mfree(e);
fd38203a
LP
360}
361
f7262a9f 362_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
363 sd_event *e;
364 int r;
365
305f78bf 366 assert_return(ret, -EINVAL);
fd38203a 367
d08eb1fa 368 e = new(sd_event, 1);
fd38203a
LP
369 if (!e)
370 return -ENOMEM;
371
d08eb1fa
LP
372 *e = (sd_event) {
373 .n_ref = 1,
374 .epoll_fd = -1,
375 .watchdog_fd = -1,
376 .realtime.wakeup = WAKEUP_CLOCK_DATA,
377 .realtime.fd = -1,
378 .realtime.next = USEC_INFINITY,
379 .boottime.wakeup = WAKEUP_CLOCK_DATA,
380 .boottime.fd = -1,
381 .boottime.next = USEC_INFINITY,
382 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
383 .monotonic.fd = -1,
384 .monotonic.next = USEC_INFINITY,
385 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
386 .realtime_alarm.fd = -1,
387 .realtime_alarm.next = USEC_INFINITY,
388 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
389 .boottime_alarm.fd = -1,
390 .boottime_alarm.next = USEC_INFINITY,
391 .perturb = USEC_INFINITY,
392 .original_pid = getpid_cached(),
393 };
fd38203a 394
c983e776
EV
395 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
396 if (r < 0)
fd38203a 397 goto fail;
fd38203a
LP
398
399 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
400 if (e->epoll_fd < 0) {
401 r = -errno;
402 goto fail;
403 }
404
7fe2903c
LP
405 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
406
34b87517 407 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
1d3a473b 408 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 … 2^63 us will be logged every 5s.");
34b87517
VC
409 e->profile_delays = true;
410 }
411
fd38203a
LP
412 *ret = e;
413 return 0;
414
415fail:
416 event_free(e);
417 return r;
418}
419
8301aa0b 420DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 421
afd15bbb
ZJS
422_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
423 if (s)
424 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
425 return sd_event_source_unref(s);
426}
427
eaa3cbef
LP
428static bool event_pid_changed(sd_event *e) {
429 assert(e);
430
a2360a46 431 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
432 * it around over a fork(). Let's complain. */
433
df0ff127 434 return e->original_pid != getpid_cached();
eaa3cbef
LP
435}
436
366e6411 437static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
438 assert(s);
439 assert(s->type == SOURCE_IO);
440
f6806734 441 if (event_pid_changed(s->event))
366e6411 442 return;
f6806734 443
fd38203a 444 if (!s->io.registered)
366e6411 445 return;
fd38203a 446
d1cf2023 447 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
f80a5d6a 448 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
55cbfaa5 449 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
450
451 s->io.registered = false;
fd38203a
LP
452}
453
305f78bf
LP
454static int source_io_register(
455 sd_event_source *s,
456 int enabled,
457 uint32_t events) {
458
fd38203a
LP
459 assert(s);
460 assert(s->type == SOURCE_IO);
baf76283 461 assert(enabled != SD_EVENT_OFF);
fd38203a 462
1eac7948 463 struct epoll_event ev = {
a82f89aa
LP
464 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
465 .data.ptr = s,
466 };
fd38203a 467
15c689d7 468 if (epoll_ctl(s->event->epoll_fd,
1eac7948 469 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
55c540d3 470 s->io.fd, &ev) < 0)
fd38203a
LP
471 return -errno;
472
473 s->io.registered = true;
474
475 return 0;
476}
477
f8f3f926
LP
478static void source_child_pidfd_unregister(sd_event_source *s) {
479 assert(s);
480 assert(s->type == SOURCE_CHILD);
481
482 if (event_pid_changed(s->event))
483 return;
484
485 if (!s->child.registered)
486 return;
487
488 if (EVENT_SOURCE_WATCH_PIDFD(s))
489 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
f80a5d6a 490 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
f8f3f926
LP
491 strna(s->description), event_source_type_to_string(s->type));
492
493 s->child.registered = false;
494}
495
496static int source_child_pidfd_register(sd_event_source *s, int enabled) {
f8f3f926
LP
497 assert(s);
498 assert(s->type == SOURCE_CHILD);
499 assert(enabled != SD_EVENT_OFF);
500
501 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 502 struct epoll_event ev = {
f8f3f926
LP
503 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
504 .data.ptr = s,
505 };
506
55c540d3
ZJS
507 if (epoll_ctl(s->event->epoll_fd,
508 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
509 s->child.pidfd, &ev) < 0)
f8f3f926
LP
510 return -errno;
511 }
512
513 s->child.registered = true;
514 return 0;
515}
516
6a0f1f6d
LP
517static clockid_t event_source_type_to_clock(EventSourceType t) {
518
519 switch (t) {
520
521 case SOURCE_TIME_REALTIME:
522 return CLOCK_REALTIME;
523
a8548816
TG
524 case SOURCE_TIME_BOOTTIME:
525 return CLOCK_BOOTTIME;
526
6a0f1f6d
LP
527 case SOURCE_TIME_MONOTONIC:
528 return CLOCK_MONOTONIC;
529
530 case SOURCE_TIME_REALTIME_ALARM:
531 return CLOCK_REALTIME_ALARM;
532
533 case SOURCE_TIME_BOOTTIME_ALARM:
534 return CLOCK_BOOTTIME_ALARM;
535
536 default:
537 return (clockid_t) -1;
538 }
539}
540
541static EventSourceType clock_to_event_source_type(clockid_t clock) {
542
543 switch (clock) {
544
545 case CLOCK_REALTIME:
546 return SOURCE_TIME_REALTIME;
547
a8548816
TG
548 case CLOCK_BOOTTIME:
549 return SOURCE_TIME_BOOTTIME;
550
6a0f1f6d
LP
551 case CLOCK_MONOTONIC:
552 return SOURCE_TIME_MONOTONIC;
553
554 case CLOCK_REALTIME_ALARM:
555 return SOURCE_TIME_REALTIME_ALARM;
556
557 case CLOCK_BOOTTIME_ALARM:
558 return SOURCE_TIME_BOOTTIME_ALARM;
559
560 default:
561 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
562 }
563}
564
565static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
566 assert(e);
567
568 switch (t) {
569
570 case SOURCE_TIME_REALTIME:
571 return &e->realtime;
572
a8548816
TG
573 case SOURCE_TIME_BOOTTIME:
574 return &e->boottime;
575
6a0f1f6d
LP
576 case SOURCE_TIME_MONOTONIC:
577 return &e->monotonic;
578
579 case SOURCE_TIME_REALTIME_ALARM:
580 return &e->realtime_alarm;
581
582 case SOURCE_TIME_BOOTTIME_ALARM:
583 return &e->boottime_alarm;
584
585 default:
586 return NULL;
587 }
588}
589
3e4eb8e7
YW
590static void event_free_signal_data(sd_event *e, struct signal_data *d) {
591 assert(e);
592
593 if (!d)
594 return;
595
596 hashmap_remove(e->signal_data, &d->priority);
597 safe_close(d->fd);
598 free(d);
599}
600
9da4cb2b
LP
601static int event_make_signal_data(
602 sd_event *e,
603 int sig,
604 struct signal_data **ret) {
4807d2d0 605
9da4cb2b
LP
606 struct signal_data *d;
607 bool added = false;
608 sigset_t ss_copy;
609 int64_t priority;
f95387cd
ZJS
610 int r;
611
612 assert(e);
613
f6806734 614 if (event_pid_changed(e))
9da4cb2b 615 return -ECHILD;
f6806734 616
9da4cb2b
LP
617 if (e->signal_sources && e->signal_sources[sig])
618 priority = e->signal_sources[sig]->priority;
619 else
de05913d 620 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 621
9da4cb2b
LP
622 d = hashmap_get(e->signal_data, &priority);
623 if (d) {
624 if (sigismember(&d->sigset, sig) > 0) {
625 if (ret)
626 *ret = d;
627 return 0;
628 }
629 } else {
d08eb1fa 630 d = new(struct signal_data, 1);
9da4cb2b
LP
631 if (!d)
632 return -ENOMEM;
633
d08eb1fa
LP
634 *d = (struct signal_data) {
635 .wakeup = WAKEUP_SIGNAL_DATA,
636 .fd = -1,
637 .priority = priority,
638 };
9da4cb2b 639
f656fdb6 640 r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d);
90f604d1
ZJS
641 if (r < 0) {
642 free(d);
9da4cb2b 643 return r;
90f604d1 644 }
f95387cd 645
9da4cb2b
LP
646 added = true;
647 }
648
649 ss_copy = d->sigset;
650 assert_se(sigaddset(&ss_copy, sig) >= 0);
651
652 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
653 if (r < 0) {
654 r = -errno;
655 goto fail;
656 }
657
658 d->sigset = ss_copy;
f95387cd 659
9da4cb2b
LP
660 if (d->fd >= 0) {
661 if (ret)
662 *ret = d;
f95387cd 663 return 0;
9da4cb2b
LP
664 }
665
7fe2903c 666 d->fd = fd_move_above_stdio(r);
f95387cd 667
1eac7948 668 struct epoll_event ev = {
a82f89aa
LP
669 .events = EPOLLIN,
670 .data.ptr = d,
671 };
f95387cd 672
15c689d7 673 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
674 r = -errno;
675 goto fail;
f95387cd
ZJS
676 }
677
9da4cb2b
LP
678 if (ret)
679 *ret = d;
680
f95387cd 681 return 0;
9da4cb2b
LP
682
683fail:
3e4eb8e7
YW
684 if (added)
685 event_free_signal_data(e, d);
9da4cb2b
LP
686
687 return r;
688}
689
690static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
691 assert(e);
692 assert(d);
693
694 /* Turns off the specified signal in the signal data
695 * object. If the signal mask of the object becomes empty that
696 * way removes it. */
697
698 if (sigismember(&d->sigset, sig) == 0)
699 return;
700
701 assert_se(sigdelset(&d->sigset, sig) >= 0);
702
703 if (sigisemptyset(&d->sigset)) {
9da4cb2b 704 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 705 event_free_signal_data(e, d);
9da4cb2b
LP
706 return;
707 }
708
01e6af73
YW
709 if (event_pid_changed(e))
710 return;
711
9da4cb2b
LP
712 assert(d->fd >= 0);
713
714 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
715 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
716}
717
718static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
719 struct signal_data *d;
720 static const int64_t zero_priority = 0;
721
722 assert(e);
723
f8f3f926
LP
724 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
725 * and possibly drop the signalfd for it. */
9da4cb2b
LP
726
727 if (sig == SIGCHLD &&
b6d5481b 728 e->n_online_child_sources > 0)
9da4cb2b
LP
729 return;
730
731 if (e->signal_sources &&
732 e->signal_sources[sig] &&
b6d5481b 733 event_source_is_online(e->signal_sources[sig]))
9da4cb2b
LP
734 return;
735
736 /*
737 * The specified signal might be enabled in three different queues:
738 *
739 * 1) the one that belongs to the priority passed (if it is non-NULL)
740 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
741 * 3) the 0 priority (to cover the SIGCHLD case)
742 *
743 * Hence, let's remove it from all three here.
744 */
745
746 if (priority) {
747 d = hashmap_get(e->signal_data, priority);
748 if (d)
749 event_unmask_signal_data(e, d, sig);
750 }
751
752 if (e->signal_sources && e->signal_sources[sig]) {
753 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
754 if (d)
755 event_unmask_signal_data(e, d, sig);
756 }
757
758 d = hashmap_get(e->signal_data, &zero_priority);
759 if (d)
760 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
761}
762
e1951c16
MS
763static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
764 assert(s);
765
766 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
767 * they are enabled/disabled or marked pending and such. */
768
769 if (s->pending)
770 prioq_reshuffle(s->event->pending, s, &s->pending_index);
771
772 if (s->prepare)
773 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
774}
775
776static void event_source_time_prioq_reshuffle(sd_event_source *s) {
777 struct clock_data *d;
778
779 assert(s);
e1951c16
MS
780
781 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
5c08c7ab
YW
782 * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered
783 * properly again. */
b6d5481b
LP
784
785 if (s->ratelimited)
786 d = &s->event->monotonic;
5c08c7ab 787 else if (EVENT_SOURCE_IS_TIME(s->type))
b6d5481b 788 assert_se(d = event_get_clock_data(s->event, s->type));
5c08c7ab
YW
789 else
790 return; /* no-op for an event source which is neither a timer nor ratelimited. */
b6d5481b 791
f41315fc
LP
792 prioq_reshuffle(d->earliest, s, &s->earliest_index);
793 prioq_reshuffle(d->latest, s, &s->latest_index);
e1951c16
MS
794 d->needs_rearm = true;
795}
796
1e45e3fe
LP
797static void event_source_time_prioq_remove(
798 sd_event_source *s,
799 struct clock_data *d) {
800
801 assert(s);
802 assert(d);
803
f41315fc
LP
804 prioq_remove(d->earliest, s, &s->earliest_index);
805 prioq_remove(d->latest, s, &s->latest_index);
806 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
807 d->needs_rearm = true;
808}
809
a71fe8b8
LP
810static void source_disconnect(sd_event_source *s) {
811 sd_event *event;
812
fd38203a
LP
813 assert(s);
814
a71fe8b8
LP
815 if (!s->event)
816 return;
15b38f93 817
a71fe8b8 818 assert(s->event->n_sources > 0);
fd38203a 819
a71fe8b8 820 switch (s->type) {
fd38203a 821
a71fe8b8
LP
822 case SOURCE_IO:
823 if (s->io.fd >= 0)
824 source_io_unregister(s);
fd38203a 825
a71fe8b8 826 break;
6a0f1f6d 827
a71fe8b8 828 case SOURCE_TIME_REALTIME:
a8548816 829 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
830 case SOURCE_TIME_MONOTONIC:
831 case SOURCE_TIME_REALTIME_ALARM:
b6d5481b
LP
832 case SOURCE_TIME_BOOTTIME_ALARM:
833 /* Only remove this event source from the time event source here if it is not ratelimited. If
834 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
835 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
836
837 if (!s->ratelimited) {
838 struct clock_data *d;
839 assert_se(d = event_get_clock_data(s->event, s->type));
840 event_source_time_prioq_remove(s, d);
841 }
842
a71fe8b8 843 break;
a71fe8b8
LP
844
845 case SOURCE_SIGNAL:
846 if (s->signal.sig > 0) {
9da4cb2b 847
a71fe8b8
LP
848 if (s->event->signal_sources)
849 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 850
9da4cb2b 851 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 852 }
fd38203a 853
a71fe8b8 854 break;
fd38203a 855
a71fe8b8 856 case SOURCE_CHILD:
86587c93
YW
857 if (event_pid_changed(s->event))
858 s->child.process_owned = false;
859
a71fe8b8 860 if (s->child.pid > 0) {
b6d5481b
LP
861 if (event_source_is_online(s)) {
862 assert(s->event->n_online_child_sources > 0);
863 s->event->n_online_child_sources--;
4807d2d0 864 }
fd38203a 865
4a0b58c4 866 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 867 }
fd38203a 868
f8f3f926
LP
869 if (EVENT_SOURCE_WATCH_PIDFD(s))
870 source_child_pidfd_unregister(s);
871 else
872 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
873
a71fe8b8 874 break;
fd38203a 875
a71fe8b8
LP
876 case SOURCE_DEFER:
877 /* nothing */
878 break;
fd38203a 879
a71fe8b8
LP
880 case SOURCE_POST:
881 set_remove(s->event->post_sources, s);
882 break;
da7e457c 883
a71fe8b8
LP
884 case SOURCE_EXIT:
885 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
886 break;
0eb2e0e3 887
97ef5391
LP
888 case SOURCE_INOTIFY: {
889 struct inode_data *inode_data;
890
891 inode_data = s->inotify.inode_data;
892 if (inode_data) {
893 struct inotify_data *inotify_data;
894 assert_se(inotify_data = inode_data->inotify_data);
895
896 /* Detach this event source from the inode object */
897 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
898 s->inotify.inode_data = NULL;
899
900 if (s->pending) {
901 assert(inotify_data->n_pending > 0);
902 inotify_data->n_pending--;
903 }
904
905 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
906 * continued to being watched. That's because inotify doesn't really have an API for that: we
907 * can only change watch masks with access to the original inode either by fd or by path. But
908 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 909 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
910 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
911 * there), but given the need for open_by_handle_at() which is privileged and not universally
912 * available this would be quite an incomplete solution. Hence we go the other way, leave the
913 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
914 * anymore after reception. Yes, this sucks, but … Linux … */
915
916 /* Maybe release the inode data (and its inotify) */
917 event_gc_inode_data(s->event, inode_data);
918 }
919
920 break;
921 }
922
a71fe8b8 923 default:
04499a70 924 assert_not_reached();
a71fe8b8 925 }
6e9feda3 926
a71fe8b8
LP
927 if (s->pending)
928 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 929
a71fe8b8
LP
930 if (s->prepare)
931 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 932
b6d5481b
LP
933 if (s->ratelimited)
934 event_source_time_prioq_remove(s, &s->event->monotonic);
935
e514aa1e 936 event = TAKE_PTR(s->event);
a71fe8b8
LP
937 LIST_REMOVE(sources, event->sources, s);
938 event->n_sources--;
fd38203a 939
f5982559
LP
940 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
941 * pidfd associated with this event source, which we'll do only on source_free(). */
942
a71fe8b8
LP
943 if (!s->floating)
944 sd_event_unref(event);
945}
946
75db809a 947static sd_event_source* source_free(sd_event_source *s) {
a71fe8b8 948 assert(s);
fd38203a 949
a71fe8b8 950 source_disconnect(s);
ab93297c
NM
951
952 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
953 s->io.fd = safe_close(s->io.fd);
954
f8f3f926
LP
955 if (s->type == SOURCE_CHILD) {
956 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
957
958 if (s->child.process_owned) {
959
960 if (!s->child.exited) {
961 bool sent = false;
962
963 if (s->child.pidfd >= 0) {
964 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
965 if (errno == ESRCH) /* Already dead */
966 sent = true;
967 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
968 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
969 s->child.pid);
970 } else
971 sent = true;
972 }
973
974 if (!sent)
975 if (kill(s->child.pid, SIGKILL) < 0)
976 if (errno != ESRCH) /* Already dead */
977 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
978 s->child.pid);
979 }
980
981 if (!s->child.waited) {
982 siginfo_t si = {};
983
984 /* Reap the child if we can */
985 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
986 }
987 }
988
989 if (s->child.pidfd_owned)
990 s->child.pidfd = safe_close(s->child.pidfd);
991 }
992
15723a1d
LP
993 if (s->destroy_callback)
994 s->destroy_callback(s->userdata);
ab93297c 995
356779df 996 free(s->description);
75db809a 997 return mfree(s);
fd38203a 998}
8c75fe17 999DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
1000
1001static int source_set_pending(sd_event_source *s, bool b) {
1002 int r;
1003
1004 assert(s);
6203e07a 1005 assert(s->type != SOURCE_EXIT);
fd38203a
LP
1006
1007 if (s->pending == b)
1008 return 0;
1009
1010 s->pending = b;
1011
1012 if (b) {
1013 s->pending_iteration = s->event->iteration;
1014
1015 r = prioq_put(s->event->pending, s, &s->pending_index);
1016 if (r < 0) {
1017 s->pending = false;
1018 return r;
1019 }
1020 } else
1021 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1022
e1951c16
MS
1023 if (EVENT_SOURCE_IS_TIME(s->type))
1024 event_source_time_prioq_reshuffle(s);
2576a19e 1025
9da4cb2b
LP
1026 if (s->type == SOURCE_SIGNAL && !b) {
1027 struct signal_data *d;
1028
1029 d = hashmap_get(s->event->signal_data, &s->priority);
1030 if (d && d->current == s)
1031 d->current = NULL;
1032 }
1033
97ef5391
LP
1034 if (s->type == SOURCE_INOTIFY) {
1035
1036 assert(s->inotify.inode_data);
1037 assert(s->inotify.inode_data->inotify_data);
1038
1039 if (b)
1040 s->inotify.inode_data->inotify_data->n_pending ++;
1041 else {
1042 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1043 s->inotify.inode_data->inotify_data->n_pending --;
1044 }
1045 }
1046
efd3be9d 1047 return 1;
fd38203a
LP
1048}
1049
a71fe8b8 1050static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
1051 sd_event_source *s;
1052
1053 assert(e);
1054
d08eb1fa 1055 s = new(sd_event_source, 1);
fd38203a
LP
1056 if (!s)
1057 return NULL;
1058
d08eb1fa
LP
1059 *s = (struct sd_event_source) {
1060 .n_ref = 1,
1061 .event = e,
1062 .floating = floating,
1063 .type = type,
1064 .pending_index = PRIOQ_IDX_NULL,
1065 .prepare_index = PRIOQ_IDX_NULL,
1066 };
a71fe8b8
LP
1067
1068 if (!floating)
1069 sd_event_ref(e);
fd38203a 1070
a71fe8b8 1071 LIST_PREPEND(sources, e->sources, s);
313cefa1 1072 e->n_sources++;
15b38f93 1073
fd38203a
LP
1074 return s;
1075}
1076
b9350e70
LP
1077static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1078 assert(s);
1079
1080 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1081}
1082
f7262a9f 1083_public_ int sd_event_add_io(
fd38203a 1084 sd_event *e,
151b9b96 1085 sd_event_source **ret,
fd38203a
LP
1086 int fd,
1087 uint32_t events,
718db961 1088 sd_event_io_handler_t callback,
151b9b96 1089 void *userdata) {
fd38203a 1090
ec766a51 1091 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1092 int r;
1093
305f78bf 1094 assert_return(e, -EINVAL);
b937d761 1095 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 1096 assert_return(fd >= 0, -EBADF);
2a16a986 1097 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1098 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1099 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1100
b9350e70
LP
1101 if (!callback)
1102 callback = io_exit_callback;
1103
a71fe8b8 1104 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1105 if (!s)
1106 return -ENOMEM;
1107
9da4cb2b 1108 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1109 s->io.fd = fd;
1110 s->io.events = events;
1111 s->io.callback = callback;
1112 s->userdata = userdata;
baf76283 1113 s->enabled = SD_EVENT_ON;
fd38203a 1114
baf76283 1115 r = source_io_register(s, s->enabled, events);
ec766a51 1116 if (r < 0)
050f74f2 1117 return r;
fd38203a 1118
a71fe8b8
LP
1119 if (ret)
1120 *ret = s;
ec766a51 1121 TAKE_PTR(s);
a71fe8b8 1122
fd38203a
LP
1123 return 0;
1124}
1125
52444dc4
LP
1126static void initialize_perturb(sd_event *e) {
1127 sd_id128_t bootid = {};
1128
1129 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1130 the same time within each minute/second/250ms, so that
52444dc4
LP
1131 events all across the system can be coalesced into a single
1132 CPU wakeup. However, let's take some system-specific
1133 randomness for this value, so that in a network of systems
1134 with synced clocks timer events are distributed a
1135 bit. Here, we calculate a perturbation usec offset from the
1136 boot ID. */
1137
3a43da28 1138 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1139 return;
1140
1141 if (sd_id128_get_boot(&bootid) >= 0)
1142 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1143}
1144
fd38203a
LP
1145static int event_setup_timer_fd(
1146 sd_event *e,
6a0f1f6d
LP
1147 struct clock_data *d,
1148 clockid_t clock) {
fd38203a 1149
fd38203a 1150 assert(e);
6a0f1f6d 1151 assert(d);
fd38203a 1152
6a0f1f6d 1153 if (_likely_(d->fd >= 0))
fd38203a
LP
1154 return 0;
1155
b44d87e2 1156 _cleanup_close_ int fd = -1;
b44d87e2 1157
6a0f1f6d 1158 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1159 if (fd < 0)
1160 return -errno;
1161
7fe2903c
LP
1162 fd = fd_move_above_stdio(fd);
1163
1eac7948 1164 struct epoll_event ev = {
a82f89aa
LP
1165 .events = EPOLLIN,
1166 .data.ptr = d,
1167 };
fd38203a 1168
15c689d7 1169 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1170 return -errno;
fd38203a 1171
b44d87e2 1172 d->fd = TAKE_FD(fd);
fd38203a
LP
1173 return 0;
1174}
1175
c4f1aff2
TG
1176static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1177 assert(s);
1178
1179 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1180}
1181
41c63f36
LP
1182static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1183 int r;
1184
1185 assert(d);
1186
1187 if (d->fd < 0) {
1188 r = event_setup_timer_fd(e, d, clock);
1189 if (r < 0)
1190 return r;
1191 }
1192
1193 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1194 if (r < 0)
1195 return r;
1196
1197 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1198 if (r < 0)
1199 return r;
1200
1201 return 0;
1202}
1203
1e45e3fe
LP
1204static int event_source_time_prioq_put(
1205 sd_event_source *s,
1206 struct clock_data *d) {
1207
1208 int r;
1209
1210 assert(s);
1211 assert(d);
19947509 1212 assert(EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
1e45e3fe 1213
f41315fc 1214 r = prioq_put(d->earliest, s, &s->earliest_index);
1e45e3fe
LP
1215 if (r < 0)
1216 return r;
1217
f41315fc 1218 r = prioq_put(d->latest, s, &s->latest_index);
1e45e3fe 1219 if (r < 0) {
f41315fc
LP
1220 assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
1221 s->earliest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
1222 return r;
1223 }
1224
1225 d->needs_rearm = true;
1226 return 0;
1227}
1228
6a0f1f6d 1229_public_ int sd_event_add_time(
fd38203a 1230 sd_event *e,
151b9b96 1231 sd_event_source **ret,
6a0f1f6d 1232 clockid_t clock,
fd38203a 1233 uint64_t usec,
c2ba3ad6 1234 uint64_t accuracy,
718db961 1235 sd_event_time_handler_t callback,
151b9b96 1236 void *userdata) {
fd38203a 1237
6a0f1f6d 1238 EventSourceType type;
ec766a51 1239 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1240 struct clock_data *d;
fd38203a
LP
1241 int r;
1242
305f78bf 1243 assert_return(e, -EINVAL);
b937d761 1244 assert_return(e = event_resolve(e), -ENOPKG);
f5fbe71d 1245 assert_return(accuracy != UINT64_MAX, -EINVAL);
da7e457c 1246 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1247 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1248
e475d10c
LP
1249 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1250 return -EOPNOTSUPP;
1251
1252 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1253 if (type < 0)
3411372e
LP
1254 return -EOPNOTSUPP;
1255
c4f1aff2
TG
1256 if (!callback)
1257 callback = time_exit_callback;
1258
1e45e3fe 1259 assert_se(d = event_get_clock_data(e, type));
c2ba3ad6 1260
41c63f36 1261 r = setup_clock_data(e, d, clock);
c983e776
EV
1262 if (r < 0)
1263 return r;
fd38203a 1264
a71fe8b8 1265 s = source_new(e, !ret, type);
fd38203a
LP
1266 if (!s)
1267 return -ENOMEM;
1268
1269 s->time.next = usec;
c2ba3ad6 1270 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1271 s->time.callback = callback;
f41315fc 1272 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
fd38203a 1273 s->userdata = userdata;
baf76283 1274 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1275
1e45e3fe 1276 r = event_source_time_prioq_put(s, d);
c2ba3ad6 1277 if (r < 0)
ec766a51 1278 return r;
fd38203a 1279
a71fe8b8
LP
1280 if (ret)
1281 *ret = s;
ec766a51 1282 TAKE_PTR(s);
a71fe8b8 1283
fd38203a
LP
1284 return 0;
1285}
1286
d6a83dc4
LP
1287_public_ int sd_event_add_time_relative(
1288 sd_event *e,
1289 sd_event_source **ret,
1290 clockid_t clock,
1291 uint64_t usec,
1292 uint64_t accuracy,
1293 sd_event_time_handler_t callback,
1294 void *userdata) {
1295
1296 usec_t t;
1297 int r;
1298
1299 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1300 * checks for overflow. */
1301
1302 r = sd_event_now(e, clock, &t);
1303 if (r < 0)
1304 return r;
1305
1306 if (usec >= USEC_INFINITY - t)
1307 return -EOVERFLOW;
1308
1309 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1310}
1311
59bc1fd7
LP
1312static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1313 assert(s);
1314
1315 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1316}
1317
f7262a9f 1318_public_ int sd_event_add_signal(
305f78bf 1319 sd_event *e,
151b9b96 1320 sd_event_source **ret,
305f78bf 1321 int sig,
718db961 1322 sd_event_signal_handler_t callback,
151b9b96 1323 void *userdata) {
305f78bf 1324
ec766a51 1325 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1326 struct signal_data *d;
fd38203a
LP
1327 int r;
1328
305f78bf 1329 assert_return(e, -EINVAL);
b937d761 1330 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1331 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1332 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1333 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1334
59bc1fd7
LP
1335 if (!callback)
1336 callback = signal_exit_callback;
1337
d1b75241
LP
1338 r = signal_is_blocked(sig);
1339 if (r < 0)
1340 return r;
1341 if (r == 0)
3022d74b
LP
1342 return -EBUSY;
1343
fd38203a
LP
1344 if (!e->signal_sources) {
1345 e->signal_sources = new0(sd_event_source*, _NSIG);
1346 if (!e->signal_sources)
1347 return -ENOMEM;
1348 } else if (e->signal_sources[sig])
1349 return -EBUSY;
1350
a71fe8b8 1351 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1352 if (!s)
1353 return -ENOMEM;
1354
1355 s->signal.sig = sig;
1356 s->signal.callback = callback;
1357 s->userdata = userdata;
baf76283 1358 s->enabled = SD_EVENT_ON;
fd38203a
LP
1359
1360 e->signal_sources[sig] = s;
fd38203a 1361
9da4cb2b 1362 r = event_make_signal_data(e, sig, &d);
ec766a51 1363 if (r < 0)
9da4cb2b 1364 return r;
fd38203a 1365
f1f00dbb
LP
1366 /* Use the signal name as description for the event source by default */
1367 (void) sd_event_source_set_description(s, signal_to_string(sig));
1368
a71fe8b8
LP
1369 if (ret)
1370 *ret = s;
ec766a51 1371 TAKE_PTR(s);
a71fe8b8 1372
fd38203a
LP
1373 return 0;
1374}
1375
b9350e70
LP
1376static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1377 assert(s);
1378
1379 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1380}
1381
f8f3f926
LP
1382static bool shall_use_pidfd(void) {
1383 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1384 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1385}
1386
f7262a9f 1387_public_ int sd_event_add_child(
305f78bf 1388 sd_event *e,
151b9b96 1389 sd_event_source **ret,
305f78bf
LP
1390 pid_t pid,
1391 int options,
718db961 1392 sd_event_child_handler_t callback,
151b9b96 1393 void *userdata) {
305f78bf 1394
ec766a51 1395 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1396 int r;
1397
305f78bf 1398 assert_return(e, -EINVAL);
b937d761 1399 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1400 assert_return(pid > 1, -EINVAL);
1401 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1402 assert_return(options != 0, -EINVAL);
da7e457c 1403 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1404 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1405
b9350e70
LP
1406 if (!callback)
1407 callback = child_exit_callback;
1408
b6d5481b 1409 if (e->n_online_child_sources == 0) {
ee880b37
LP
1410 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1411 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1412 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1413 * take effect.
1414 *
1415 * (As an optimization we only do this check on the first child event source created.) */
1416 r = signal_is_blocked(SIGCHLD);
1417 if (r < 0)
1418 return r;
1419 if (r == 0)
1420 return -EBUSY;
1421 }
1422
d5099efc 1423 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1424 if (r < 0)
1425 return r;
1426
4a0b58c4 1427 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1428 return -EBUSY;
1429
a71fe8b8 1430 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1431 if (!s)
1432 return -ENOMEM;
1433
f8f3f926 1434 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1435 s->child.options = options;
1436 s->child.callback = callback;
1437 s->userdata = userdata;
baf76283 1438 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1439
f8f3f926
LP
1440 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1441 * pin the PID, and make regular waitid() handling race-free. */
1442
1443 if (shall_use_pidfd()) {
54988a27 1444 s->child.pidfd = pidfd_open(pid, 0);
f8f3f926
LP
1445 if (s->child.pidfd < 0) {
1446 /* Propagate errors unless the syscall is not supported or blocked */
1447 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1448 return -errno;
1449 } else
1450 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1451 } else
1452 s->child.pidfd = -1;
1453
f8f3f926
LP
1454 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1455 /* We have a pidfd and we only want to watch for exit */
f8f3f926 1456 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1457 if (r < 0)
f8f3f926 1458 return r;
ac9f2640 1459
f8f3f926
LP
1460 } else {
1461 /* We have no pidfd or we shall wait for some other event than WEXITED */
f8f3f926 1462 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1463 if (r < 0)
f8f3f926 1464 return r;
f8f3f926
LP
1465
1466 e->need_process_child = true;
1467 }
c2ba3ad6 1468
54988a27
YW
1469 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1470 if (r < 0)
1471 return r;
1472
1473 /* These must be done after everything succeeds. */
1474 s->child.pid = pid;
b6d5481b 1475 e->n_online_child_sources++;
ac9f2640 1476
a71fe8b8
LP
1477 if (ret)
1478 *ret = s;
ec766a51 1479 TAKE_PTR(s);
f8f3f926
LP
1480 return 0;
1481}
1482
1483_public_ int sd_event_add_child_pidfd(
1484 sd_event *e,
1485 sd_event_source **ret,
1486 int pidfd,
1487 int options,
1488 sd_event_child_handler_t callback,
1489 void *userdata) {
1490
1491
1492 _cleanup_(source_freep) sd_event_source *s = NULL;
1493 pid_t pid;
1494 int r;
1495
1496 assert_return(e, -EINVAL);
1497 assert_return(e = event_resolve(e), -ENOPKG);
1498 assert_return(pidfd >= 0, -EBADF);
1499 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1500 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1501 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1502 assert_return(!event_pid_changed(e), -ECHILD);
1503
b9350e70
LP
1504 if (!callback)
1505 callback = child_exit_callback;
1506
b6d5481b 1507 if (e->n_online_child_sources == 0) {
ee880b37
LP
1508 r = signal_is_blocked(SIGCHLD);
1509 if (r < 0)
1510 return r;
1511 if (r == 0)
1512 return -EBUSY;
1513 }
1514
f8f3f926
LP
1515 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1516 if (r < 0)
1517 return r;
1518
1519 r = pidfd_get_pid(pidfd, &pid);
1520 if (r < 0)
1521 return r;
1522
1523 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1524 return -EBUSY;
1525
1526 s = source_new(e, !ret, SOURCE_CHILD);
1527 if (!s)
1528 return -ENOMEM;
1529
1530 s->wakeup = WAKEUP_EVENT_SOURCE;
1531 s->child.pidfd = pidfd;
1532 s->child.pid = pid;
1533 s->child.options = options;
1534 s->child.callback = callback;
1535 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1536 s->userdata = userdata;
1537 s->enabled = SD_EVENT_ONESHOT;
1538
1539 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1540 if (r < 0)
1541 return r;
1542
f8f3f926
LP
1543 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1544 /* We only want to watch for WEXITED */
f8f3f926 1545 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1546 if (r < 0)
f8f3f926 1547 return r;
f8f3f926
LP
1548 } else {
1549 /* We shall wait for some other event than WEXITED */
f8f3f926 1550 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1551 if (r < 0)
f8f3f926 1552 return r;
a71fe8b8 1553
f8f3f926
LP
1554 e->need_process_child = true;
1555 }
1556
b6d5481b 1557 e->n_online_child_sources++;
ac9f2640 1558
f8f3f926
LP
1559 if (ret)
1560 *ret = s;
f8f3f926 1561 TAKE_PTR(s);
fd38203a
LP
1562 return 0;
1563}
1564
b9350e70
LP
1565static int generic_exit_callback(sd_event_source *s, void *userdata) {
1566 assert(s);
1567
1568 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1569}
1570
f7262a9f 1571_public_ int sd_event_add_defer(
305f78bf 1572 sd_event *e,
151b9b96 1573 sd_event_source **ret,
718db961 1574 sd_event_handler_t callback,
151b9b96 1575 void *userdata) {
305f78bf 1576
ec766a51 1577 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1578 int r;
1579
305f78bf 1580 assert_return(e, -EINVAL);
b937d761 1581 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1582 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1583 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1584
b9350e70
LP
1585 if (!callback)
1586 callback = generic_exit_callback;
1587
a71fe8b8 1588 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1589 if (!s)
1590 return -ENOMEM;
1591
1592 s->defer.callback = callback;
1593 s->userdata = userdata;
baf76283 1594 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1595
1596 r = source_set_pending(s, true);
ec766a51 1597 if (r < 0)
fd38203a 1598 return r;
fd38203a 1599
a71fe8b8
LP
1600 if (ret)
1601 *ret = s;
ec766a51 1602 TAKE_PTR(s);
a71fe8b8 1603
fd38203a
LP
1604 return 0;
1605}
1606
6e9feda3
LP
1607_public_ int sd_event_add_post(
1608 sd_event *e,
1609 sd_event_source **ret,
1610 sd_event_handler_t callback,
1611 void *userdata) {
1612
ec766a51 1613 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1614 int r;
1615
1616 assert_return(e, -EINVAL);
b937d761 1617 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1618 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1619 assert_return(!event_pid_changed(e), -ECHILD);
1620
b9350e70
LP
1621 if (!callback)
1622 callback = generic_exit_callback;
1623
a71fe8b8 1624 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1625 if (!s)
1626 return -ENOMEM;
1627
1628 s->post.callback = callback;
1629 s->userdata = userdata;
1630 s->enabled = SD_EVENT_ON;
1631
de7fef4b 1632 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1633 if (r < 0)
6e9feda3 1634 return r;
de7fef4b 1635 assert(r > 0);
6e9feda3 1636
a71fe8b8
LP
1637 if (ret)
1638 *ret = s;
ec766a51 1639 TAKE_PTR(s);
a71fe8b8 1640
6e9feda3
LP
1641 return 0;
1642}
1643
6203e07a 1644_public_ int sd_event_add_exit(
305f78bf 1645 sd_event *e,
151b9b96 1646 sd_event_source **ret,
718db961 1647 sd_event_handler_t callback,
151b9b96 1648 void *userdata) {
305f78bf 1649
ec766a51 1650 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1651 int r;
1652
1653 assert_return(e, -EINVAL);
b937d761 1654 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1655 assert_return(callback, -EINVAL);
1656 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1657 assert_return(!event_pid_changed(e), -ECHILD);
1658
c983e776
EV
1659 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1660 if (r < 0)
1661 return r;
da7e457c 1662
a71fe8b8 1663 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1664 if (!s)
da7e457c 1665 return -ENOMEM;
fd38203a 1666
6203e07a 1667 s->exit.callback = callback;
da7e457c 1668 s->userdata = userdata;
6203e07a 1669 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1670 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1671
6203e07a 1672 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1673 if (r < 0)
da7e457c 1674 return r;
da7e457c 1675
a71fe8b8
LP
1676 if (ret)
1677 *ret = s;
ec766a51 1678 TAKE_PTR(s);
a71fe8b8 1679
da7e457c
LP
1680 return 0;
1681}
1682
97ef5391
LP
1683static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1684 assert(e);
1685
1686 if (!d)
1687 return;
1688
1689 assert(hashmap_isempty(d->inodes));
1690 assert(hashmap_isempty(d->wd));
1691
1692 if (d->buffer_filled > 0)
1693 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1694
1695 hashmap_free(d->inodes);
1696 hashmap_free(d->wd);
1697
1698 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1699
1700 if (d->fd >= 0) {
fbae5090
YW
1701 if (!event_pid_changed(e) &&
1702 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
97ef5391
LP
1703 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1704
1705 safe_close(d->fd);
1706 }
1707 free(d);
1708}
1709
1710static int event_make_inotify_data(
1711 sd_event *e,
1712 int64_t priority,
1713 struct inotify_data **ret) {
1714
1715 _cleanup_close_ int fd = -1;
1716 struct inotify_data *d;
97ef5391
LP
1717 int r;
1718
1719 assert(e);
1720
1721 d = hashmap_get(e->inotify_data, &priority);
1722 if (d) {
1723 if (ret)
1724 *ret = d;
1725 return 0;
1726 }
1727
1728 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1729 if (fd < 0)
1730 return -errno;
1731
1732 fd = fd_move_above_stdio(fd);
1733
97ef5391
LP
1734 d = new(struct inotify_data, 1);
1735 if (!d)
1736 return -ENOMEM;
1737
1738 *d = (struct inotify_data) {
1739 .wakeup = WAKEUP_INOTIFY_DATA,
1740 .fd = TAKE_FD(fd),
1741 .priority = priority,
1742 };
1743
c2484a75 1744 r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d);
97ef5391
LP
1745 if (r < 0) {
1746 d->fd = safe_close(d->fd);
1747 free(d);
1748 return r;
1749 }
1750
1eac7948 1751 struct epoll_event ev = {
97ef5391
LP
1752 .events = EPOLLIN,
1753 .data.ptr = d,
1754 };
1755
1756 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1757 r = -errno;
1758 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1759 * remove the fd from the epoll first, which we don't want as we couldn't
1760 * add it in the first place. */
1761 event_free_inotify_data(e, d);
1762 return r;
1763 }
1764
1765 if (ret)
1766 *ret = d;
1767
1768 return 1;
1769}
1770
7a08d314 1771static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1772 int r;
97ef5391
LP
1773
1774 assert(x);
1775 assert(y);
1776
90c88092
YW
1777 r = CMP(x->dev, y->dev);
1778 if (r != 0)
1779 return r;
97ef5391 1780
6dd91b36 1781 return CMP(x->ino, y->ino);
97ef5391
LP
1782}
1783
7a08d314
YW
1784static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1785 assert(d);
97ef5391
LP
1786
1787 siphash24_compress(&d->dev, sizeof(d->dev), state);
1788 siphash24_compress(&d->ino, sizeof(d->ino), state);
1789}
1790
7a08d314 1791DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1792
1793static void event_free_inode_data(
1794 sd_event *e,
1795 struct inode_data *d) {
1796
1797 assert(e);
1798
1799 if (!d)
1800 return;
1801
32861b4c 1802 assert(LIST_IS_EMPTY(d->event_sources));
97ef5391
LP
1803
1804 if (d->fd >= 0) {
1805 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1806 safe_close(d->fd);
1807 }
1808
1809 if (d->inotify_data) {
1810
1811 if (d->wd >= 0) {
fbae5090 1812 if (d->inotify_data->fd >= 0 && !event_pid_changed(e)) {
97ef5391
LP
1813 /* So here's a problem. At the time this runs the watch descriptor might already be
1814 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1815 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1816 * likely case to happen. */
1817
1818 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1819 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1820 }
1821
1822 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1823 }
1824
1825 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1826 }
1827
1828 free(d);
1829}
1830
53baf2ef
LP
1831static void event_gc_inotify_data(
1832 sd_event *e,
1833 struct inotify_data *d) {
1834
1835 assert(e);
1836
1837 /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch
1838 * any inode with it anymore, which in turn happens if no event source of this priority is interested
1839 * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC
1840 * (under the expectation that the GC is called again once the counter is decremented). */
1841
1842 if (!d)
1843 return;
1844
1845 if (!hashmap_isempty(d->inodes))
1846 return;
1847
1848 if (d->n_busy > 0)
1849 return;
1850
1851 event_free_inotify_data(e, d);
1852}
1853
97ef5391
LP
1854static void event_gc_inode_data(
1855 sd_event *e,
1856 struct inode_data *d) {
1857
1858 struct inotify_data *inotify_data;
1859
1860 assert(e);
1861
1862 if (!d)
1863 return;
1864
32861b4c 1865 if (!LIST_IS_EMPTY(d->event_sources))
97ef5391
LP
1866 return;
1867
1868 inotify_data = d->inotify_data;
1869 event_free_inode_data(e, d);
1870
53baf2ef 1871 event_gc_inotify_data(e, inotify_data);
97ef5391
LP
1872}
1873
1874static int event_make_inode_data(
1875 sd_event *e,
1876 struct inotify_data *inotify_data,
1877 dev_t dev,
1878 ino_t ino,
1879 struct inode_data **ret) {
1880
1881 struct inode_data *d, key;
1882 int r;
1883
1884 assert(e);
1885 assert(inotify_data);
1886
1887 key = (struct inode_data) {
1888 .ino = ino,
1889 .dev = dev,
1890 };
1891
1892 d = hashmap_get(inotify_data->inodes, &key);
1893 if (d) {
1894 if (ret)
1895 *ret = d;
1896
1897 return 0;
1898 }
1899
1900 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1901 if (r < 0)
1902 return r;
1903
1904 d = new(struct inode_data, 1);
1905 if (!d)
1906 return -ENOMEM;
1907
1908 *d = (struct inode_data) {
1909 .dev = dev,
1910 .ino = ino,
1911 .wd = -1,
1912 .fd = -1,
1913 .inotify_data = inotify_data,
1914 };
1915
1916 r = hashmap_put(inotify_data->inodes, d, d);
1917 if (r < 0) {
1918 free(d);
1919 return r;
1920 }
1921
1922 if (ret)
1923 *ret = d;
1924
1925 return 1;
1926}
1927
1928static uint32_t inode_data_determine_mask(struct inode_data *d) {
1929 bool excl_unlink = true;
1930 uint32_t combined = 0;
97ef5391
LP
1931
1932 assert(d);
1933
1934 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1935 * the IN_EXCL_UNLINK flag is ANDed instead.
1936 *
1937 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1938 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1939 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1940 * events we don't care for client-side. */
1941
1942 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1943
1944 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1945 excl_unlink = false;
1946
1947 combined |= s->inotify.mask;
1948 }
1949
1950 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1951}
1952
1953static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1954 uint32_t combined_mask;
1955 int wd, r;
1956
1957 assert(d);
1958 assert(d->fd >= 0);
1959
1960 combined_mask = inode_data_determine_mask(d);
1961
1962 if (d->wd >= 0 && combined_mask == d->combined_mask)
1963 return 0;
1964
1965 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1966 if (r < 0)
1967 return r;
1968
1969 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1970 if (wd < 0)
1971 return -errno;
1972
1973 if (d->wd < 0) {
1974 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1975 if (r < 0) {
1976 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1977 return r;
1978 }
1979
1980 d->wd = wd;
1981
1982 } else if (d->wd != wd) {
1983
1984 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1985 (void) inotify_rm_watch(d->fd, wd);
1986 return -EINVAL;
1987 }
1988
1989 d->combined_mask = combined_mask;
1990 return 1;
1991}
1992
b9350e70
LP
1993static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1994 assert(s);
1995
1996 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1997}
1998
e67d738a 1999static int event_add_inotify_fd_internal(
97ef5391
LP
2000 sd_event *e,
2001 sd_event_source **ret,
e67d738a
LP
2002 int fd,
2003 bool donate,
97ef5391
LP
2004 uint32_t mask,
2005 sd_event_inotify_handler_t callback,
2006 void *userdata) {
2007
e67d738a
LP
2008 _cleanup_close_ int donated_fd = donate ? fd : -1;
2009 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
2010 struct inotify_data *inotify_data = NULL;
2011 struct inode_data *inode_data = NULL;
97ef5391
LP
2012 struct stat st;
2013 int r;
2014
2015 assert_return(e, -EINVAL);
2016 assert_return(e = event_resolve(e), -ENOPKG);
e67d738a 2017 assert_return(fd >= 0, -EBADF);
97ef5391
LP
2018 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2019 assert_return(!event_pid_changed(e), -ECHILD);
2020
b9350e70
LP
2021 if (!callback)
2022 callback = inotify_exit_callback;
2023
97ef5391
LP
2024 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
2025 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
2026 * the user can't use them for us. */
2027 if (mask & IN_MASK_ADD)
2028 return -EINVAL;
2029
97ef5391
LP
2030 if (fstat(fd, &st) < 0)
2031 return -errno;
2032
2033 s = source_new(e, !ret, SOURCE_INOTIFY);
2034 if (!s)
2035 return -ENOMEM;
2036
2037 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
2038 s->inotify.mask = mask;
2039 s->inotify.callback = callback;
2040 s->userdata = userdata;
2041
2042 /* Allocate an inotify object for this priority, and an inode object within it */
2043 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
2044 if (r < 0)
8c75fe17 2045 return r;
97ef5391
LP
2046
2047 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17 2048 if (r < 0) {
e67d738a 2049 event_gc_inotify_data(e, inotify_data);
8c75fe17
ZJS
2050 return r;
2051 }
97ef5391
LP
2052
2053 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2054 * the event source, until then, for which we need the original inode. */
2055 if (inode_data->fd < 0) {
e67d738a
LP
2056 if (donated_fd >= 0)
2057 inode_data->fd = TAKE_FD(donated_fd);
2058 else {
2059 inode_data->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2060 if (inode_data->fd < 0) {
2061 r = -errno;
2062 event_gc_inode_data(e, inode_data);
2063 return r;
2064 }
2065 }
2066
97ef5391
LP
2067 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
2068 }
2069
2070 /* Link our event source to the inode data object */
2071 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
2072 s->inotify.inode_data = inode_data;
2073
97ef5391
LP
2074 /* Actually realize the watch now */
2075 r = inode_data_realize_watch(e, inode_data);
2076 if (r < 0)
8c75fe17 2077 return r;
97ef5391 2078
97ef5391
LP
2079 if (ret)
2080 *ret = s;
8c75fe17 2081 TAKE_PTR(s);
97ef5391
LP
2082
2083 return 0;
97ef5391
LP
2084}
2085
e67d738a
LP
2086_public_ int sd_event_add_inotify_fd(
2087 sd_event *e,
2088 sd_event_source **ret,
2089 int fd,
2090 uint32_t mask,
2091 sd_event_inotify_handler_t callback,
2092 void *userdata) {
2093
2094 return event_add_inotify_fd_internal(e, ret, fd, /* donate= */ false, mask, callback, userdata);
2095}
2096
2097_public_ int sd_event_add_inotify(
2098 sd_event *e,
2099 sd_event_source **ret,
2100 const char *path,
2101 uint32_t mask,
2102 sd_event_inotify_handler_t callback,
2103 void *userdata) {
2104
2091c779 2105 sd_event_source *s = NULL; /* avoid false maybe-uninitialized warning */
e67d738a
LP
2106 int fd, r;
2107
2108 assert_return(path, -EINVAL);
2109
2110 fd = open(path, O_PATH|O_CLOEXEC|
2111 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
2112 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
2113 if (fd < 0)
2114 return -errno;
2115
2116 r = event_add_inotify_fd_internal(e, &s, fd, /* donate= */ true, mask, callback, userdata);
2117 if (r < 0)
2118 return r;
2119
2120 (void) sd_event_source_set_description(s, path);
2121
2122 if (ret)
2123 *ret = s;
2124
2125 return r;
2126}
2127
8301aa0b 2128static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
2129 if (!s)
2130 return NULL;
da7e457c 2131
8301aa0b
YW
2132 /* Here's a special hack: when we are called from a
2133 * dispatch handler we won't free the event source
2134 * immediately, but we will detach the fd from the
2135 * epoll. This way it is safe for the caller to unref
2136 * the event source and immediately close the fd, but
2137 * we still retain a valid event source object after
2138 * the callback. */
fd38203a 2139
8301aa0b
YW
2140 if (s->dispatching) {
2141 if (s->type == SOURCE_IO)
2142 source_io_unregister(s);
fd38203a 2143
8301aa0b
YW
2144 source_disconnect(s);
2145 } else
2146 source_free(s);
fd38203a
LP
2147
2148 return NULL;
2149}
2150
8301aa0b
YW
2151DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
2152
356779df 2153_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 2154 assert_return(s, -EINVAL);
f4b2933e 2155 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2156
356779df 2157 return free_and_strdup(&s->description, description);
f7f53e9e
TG
2158}
2159
356779df 2160_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 2161 assert_return(s, -EINVAL);
356779df 2162 assert_return(description, -EINVAL);
f4b2933e 2163 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2164
7d92a1a4
ZJS
2165 if (!s->description)
2166 return -ENXIO;
2167
356779df 2168 *description = s->description;
f7f53e9e
TG
2169 return 0;
2170}
2171
adcc4ca3 2172_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 2173 assert_return(s, NULL);
eaa3cbef
LP
2174
2175 return s->event;
2176}
2177
f7262a9f 2178_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 2179 assert_return(s, -EINVAL);
6203e07a 2180 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 2181 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2182 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2183
2184 return s->pending;
2185}
2186
f7262a9f 2187_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2188 assert_return(s, -EINVAL);
2189 assert_return(s->type == SOURCE_IO, -EDOM);
2190 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2191
2192 return s->io.fd;
2193}
2194
30caf8f3
LP
2195_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2196 int r;
2197
2198 assert_return(s, -EINVAL);
8ac43fee 2199 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2200 assert_return(s->type == SOURCE_IO, -EDOM);
2201 assert_return(!event_pid_changed(s->event), -ECHILD);
2202
2203 if (s->io.fd == fd)
2204 return 0;
2205
b6d5481b 2206 if (event_source_is_offline(s)) {
30caf8f3
LP
2207 s->io.fd = fd;
2208 s->io.registered = false;
2209 } else {
2210 int saved_fd;
2211
2212 saved_fd = s->io.fd;
2213 assert(s->io.registered);
2214
2215 s->io.fd = fd;
2216 s->io.registered = false;
2217
2218 r = source_io_register(s, s->enabled, s->io.events);
2219 if (r < 0) {
2220 s->io.fd = saved_fd;
2221 s->io.registered = true;
2222 return r;
2223 }
2224
5a795bff 2225 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2226 }
2227
2228 return 0;
2229}
2230
ab93297c
NM
2231_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2232 assert_return(s, -EINVAL);
2233 assert_return(s->type == SOURCE_IO, -EDOM);
2234
2235 return s->io.owned;
2236}
2237
2238_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2239 assert_return(s, -EINVAL);
2240 assert_return(s->type == SOURCE_IO, -EDOM);
2241
2242 s->io.owned = own;
2243 return 0;
2244}
2245
f7262a9f 2246_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2247 assert_return(s, -EINVAL);
2248 assert_return(events, -EINVAL);
2249 assert_return(s->type == SOURCE_IO, -EDOM);
2250 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2251
2252 *events = s->io.events;
2253 return 0;
2254}
2255
f7262a9f 2256_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2257 int r;
2258
305f78bf
LP
2259 assert_return(s, -EINVAL);
2260 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2261 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2262 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2263 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2264
b63c8d4f
DH
2265 /* edge-triggered updates are never skipped, so we can reset edges */
2266 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2267 return 0;
2268
2a0dc6cd
LP
2269 r = source_set_pending(s, false);
2270 if (r < 0)
2271 return r;
2272
b6d5481b 2273 if (event_source_is_online(s)) {
e4715127 2274 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2275 if (r < 0)
2276 return r;
2277 }
2278
2279 s->io.events = events;
2280
2281 return 0;
2282}
2283
f7262a9f 2284_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2285 assert_return(s, -EINVAL);
2286 assert_return(revents, -EINVAL);
2287 assert_return(s->type == SOURCE_IO, -EDOM);
2288 assert_return(s->pending, -ENODATA);
2289 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2290
2291 *revents = s->io.revents;
2292 return 0;
2293}
2294
f7262a9f 2295_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2296 assert_return(s, -EINVAL);
2297 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2298 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2299
2300 return s->signal.sig;
2301}
2302
31927c16 2303_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2304 assert_return(s, -EINVAL);
2305 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2306
6680b8d1
ME
2307 *priority = s->priority;
2308 return 0;
fd38203a
LP
2309}
2310
31927c16 2311_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2312 bool rm_inotify = false, rm_inode = false;
2313 struct inotify_data *new_inotify_data = NULL;
2314 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2315 int r;
2316
305f78bf 2317 assert_return(s, -EINVAL);
da7e457c 2318 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2319 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2320
2321 if (s->priority == priority)
2322 return 0;
2323
97ef5391
LP
2324 if (s->type == SOURCE_INOTIFY) {
2325 struct inode_data *old_inode_data;
2326
2327 assert(s->inotify.inode_data);
2328 old_inode_data = s->inotify.inode_data;
2329
2330 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2331 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2332 * events we allow priority changes only until the first following iteration. */
2333 if (old_inode_data->fd < 0)
2334 return -EOPNOTSUPP;
2335
2336 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2337 if (r < 0)
2338 return r;
2339 rm_inotify = r > 0;
2340
2341 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2342 if (r < 0)
2343 goto fail;
2344 rm_inode = r > 0;
2345
2346 if (new_inode_data->fd < 0) {
2347 /* Duplicate the fd for the new inode object if we don't have any yet */
2348 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2349 if (new_inode_data->fd < 0) {
2350 r = -errno;
2351 goto fail;
2352 }
2353
2354 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2355 }
2356
2357 /* Move the event source to the new inode data structure */
2358 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2359 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2360 s->inotify.inode_data = new_inode_data;
2361
2362 /* Now create the new watch */
2363 r = inode_data_realize_watch(s->event, new_inode_data);
2364 if (r < 0) {
2365 /* Move it back */
2366 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2367 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2368 s->inotify.inode_data = old_inode_data;
2369 goto fail;
2370 }
2371
2372 s->priority = priority;
2373
2374 event_gc_inode_data(s->event, old_inode_data);
2375
b6d5481b 2376 } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) {
9da4cb2b
LP
2377 struct signal_data *old, *d;
2378
2379 /* Move us from the signalfd belonging to the old
2380 * priority to the signalfd of the new priority */
2381
2382 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2383
2384 s->priority = priority;
2385
2386 r = event_make_signal_data(s->event, s->signal.sig, &d);
2387 if (r < 0) {
2388 s->priority = old->priority;
2389 return r;
2390 }
2391
2392 event_unmask_signal_data(s->event, old, s->signal.sig);
2393 } else
2394 s->priority = priority;
fd38203a 2395
e1951c16 2396 event_source_pp_prioq_reshuffle(s);
fd38203a 2397
6203e07a
LP
2398 if (s->type == SOURCE_EXIT)
2399 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2400
fd38203a 2401 return 0;
97ef5391
LP
2402
2403fail:
2404 if (rm_inode)
2405 event_free_inode_data(s->event, new_inode_data);
2406
2407 if (rm_inotify)
2408 event_free_inotify_data(s->event, new_inotify_data);
2409
2410 return r;
fd38203a
LP
2411}
2412
cad143a8 2413_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
305f78bf 2414 assert_return(s, -EINVAL);
305f78bf 2415 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2416
cad143a8
LP
2417 if (ret)
2418 *ret = s->enabled;
2419
08c1eb0e 2420 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2421}
2422
b6d5481b
LP
2423static int event_source_offline(
2424 sd_event_source *s,
2425 int enabled,
2426 bool ratelimited) {
2427
2428 bool was_offline;
fd38203a
LP
2429 int r;
2430
ddfde737 2431 assert(s);
b6d5481b 2432 assert(enabled == SD_EVENT_OFF || ratelimited);
fd38203a 2433
ddfde737 2434 /* Unset the pending flag when this event source is disabled */
b6d5481b
LP
2435 if (s->enabled != SD_EVENT_OFF &&
2436 enabled == SD_EVENT_OFF &&
2437 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2438 r = source_set_pending(s, false);
2439 if (r < 0)
2440 return r;
2441 }
cc567911 2442
b6d5481b
LP
2443 was_offline = event_source_is_offline(s);
2444 s->enabled = enabled;
2445 s->ratelimited = ratelimited;
fd38203a 2446
ddfde737 2447 switch (s->type) {
fd38203a 2448
ddfde737
LP
2449 case SOURCE_IO:
2450 source_io_unregister(s);
2451 break;
ac989a78 2452
ddfde737
LP
2453 case SOURCE_SIGNAL:
2454 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2455 break;
fd38203a 2456
ddfde737 2457 case SOURCE_CHILD:
b6d5481b
LP
2458 if (!was_offline) {
2459 assert(s->event->n_online_child_sources > 0);
2460 s->event->n_online_child_sources--;
2461 }
fd38203a 2462
ddfde737
LP
2463 if (EVENT_SOURCE_WATCH_PIDFD(s))
2464 source_child_pidfd_unregister(s);
2465 else
2466 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2467 break;
4807d2d0 2468
ddfde737
LP
2469 case SOURCE_EXIT:
2470 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2471 break;
fd38203a 2472
2115b9b6
YW
2473 case SOURCE_TIME_REALTIME:
2474 case SOURCE_TIME_BOOTTIME:
2475 case SOURCE_TIME_MONOTONIC:
2476 case SOURCE_TIME_REALTIME_ALARM:
2477 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737
LP
2478 case SOURCE_DEFER:
2479 case SOURCE_POST:
2480 case SOURCE_INOTIFY:
2481 break;
fd38203a 2482
ddfde737 2483 default:
04499a70 2484 assert_not_reached();
ddfde737 2485 }
fd38203a 2486
2115b9b6
YW
2487 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2488 event_source_time_prioq_reshuffle(s);
2489
b6d5481b 2490 return 1;
ddfde737 2491}
f8f3f926 2492
b6d5481b
LP
2493static int event_source_online(
2494 sd_event_source *s,
2495 int enabled,
2496 bool ratelimited) {
2497
2498 bool was_online;
ddfde737 2499 int r;
fd38203a 2500
ddfde737 2501 assert(s);
b6d5481b 2502 assert(enabled != SD_EVENT_OFF || !ratelimited);
305f78bf 2503
ddfde737 2504 /* Unset the pending flag when this event source is enabled */
b6d5481b
LP
2505 if (s->enabled == SD_EVENT_OFF &&
2506 enabled != SD_EVENT_OFF &&
2507 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
ddfde737
LP
2508 r = source_set_pending(s, false);
2509 if (r < 0)
2510 return r;
2511 }
9d3e3aa5 2512
b6d5481b
LP
2513 /* Are we really ready for onlining? */
2514 if (enabled == SD_EVENT_OFF || ratelimited) {
2515 /* Nope, we are not ready for onlining, then just update the precise state and exit */
2516 s->enabled = enabled;
2517 s->ratelimited = ratelimited;
2518 return 0;
2519 }
2520
2521 was_online = event_source_is_online(s);
2522
ddfde737 2523 switch (s->type) {
ddfde737 2524 case SOURCE_IO:
b6d5481b 2525 r = source_io_register(s, enabled, s->io.events);
d2eafe61 2526 if (r < 0)
ddfde737 2527 return r;
ddfde737 2528 break;
fd38203a 2529
ddfde737
LP
2530 case SOURCE_SIGNAL:
2531 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2532 if (r < 0) {
ddfde737
LP
2533 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2534 return r;
2535 }
fd38203a 2536
ddfde737 2537 break;
fd38203a 2538
ddfde737 2539 case SOURCE_CHILD:
ddfde737
LP
2540 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2541 /* yes, we have pidfd */
9da4cb2b 2542
b6d5481b 2543 r = source_child_pidfd_register(s, enabled);
ac9f2640 2544 if (r < 0)
9da4cb2b 2545 return r;
ddfde737
LP
2546 } else {
2547 /* no pidfd, or something other to watch for than WEXITED */
9da4cb2b 2548
ddfde737
LP
2549 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2550 if (r < 0) {
ddfde737
LP
2551 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2552 return r;
2553 }
2554 }
fd38203a 2555
b6d5481b
LP
2556 if (!was_online)
2557 s->event->n_online_child_sources++;
ddfde737 2558 break;
4807d2d0 2559
d2eafe61
ZJS
2560 case SOURCE_TIME_REALTIME:
2561 case SOURCE_TIME_BOOTTIME:
2562 case SOURCE_TIME_MONOTONIC:
2563 case SOURCE_TIME_REALTIME_ALARM:
2564 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737 2565 case SOURCE_EXIT:
ddfde737
LP
2566 case SOURCE_DEFER:
2567 case SOURCE_POST:
2568 case SOURCE_INOTIFY:
2569 break;
9da4cb2b 2570
ddfde737 2571 default:
04499a70 2572 assert_not_reached();
ddfde737 2573 }
f8f3f926 2574
b6d5481b
LP
2575 s->enabled = enabled;
2576 s->ratelimited = ratelimited;
d2eafe61
ZJS
2577
2578 /* Non-failing operations below */
2115b9b6 2579 if (s->type == SOURCE_EXIT)
d2eafe61 2580 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
d2eafe61 2581
2115b9b6
YW
2582 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2583 event_source_time_prioq_reshuffle(s);
d2eafe61 2584
b6d5481b 2585 return 1;
ddfde737
LP
2586}
2587
2588_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2589 int r;
9da4cb2b 2590
ddfde737
LP
2591 assert_return(s, -EINVAL);
2592 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2593 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2594
ddfde737
LP
2595 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2596 if (s->event->state == SD_EVENT_FINISHED)
2597 return m == SD_EVENT_OFF ? 0 : -ESTALE;
305f78bf 2598
ddfde737
LP
2599 if (s->enabled == m) /* No change? */
2600 return 0;
9d3e3aa5 2601
ddfde737 2602 if (m == SD_EVENT_OFF)
b6d5481b 2603 r = event_source_offline(s, m, s->ratelimited);
ddfde737
LP
2604 else {
2605 if (s->enabled != SD_EVENT_OFF) {
2606 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2607 * event source is already enabled after all. */
2608 s->enabled = m;
2609 return 0;
fd38203a 2610 }
ddfde737 2611
b6d5481b 2612 r = event_source_online(s, m, s->ratelimited);
fd38203a 2613 }
ddfde737
LP
2614 if (r < 0)
2615 return r;
fd38203a 2616
e1951c16 2617 event_source_pp_prioq_reshuffle(s);
fd38203a
LP
2618 return 0;
2619}
2620
f7262a9f 2621_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2622 assert_return(s, -EINVAL);
2623 assert_return(usec, -EINVAL);
6a0f1f6d 2624 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2625 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2626
2627 *usec = s->time.next;
2628 return 0;
2629}
2630
f7262a9f 2631_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2632 int r;
6a0f1f6d 2633
305f78bf 2634 assert_return(s, -EINVAL);
6a0f1f6d 2635 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2636 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2637 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2638
2a0dc6cd
LP
2639 r = source_set_pending(s, false);
2640 if (r < 0)
2641 return r;
2576a19e 2642
2a0dc6cd 2643 s->time.next = usec;
fd38203a 2644
e1951c16 2645 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2646 return 0;
2647}
2648
d6a83dc4
LP
2649_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2650 usec_t t;
2651 int r;
2652
2653 assert_return(s, -EINVAL);
2654 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2655
2656 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2657 if (r < 0)
2658 return r;
2659
496db330
YW
2660 usec = usec_add(t, usec);
2661 if (usec == USEC_INFINITY)
d6a83dc4
LP
2662 return -EOVERFLOW;
2663
496db330 2664 return sd_event_source_set_time(s, usec);
d6a83dc4
LP
2665}
2666
f7262a9f 2667_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2668 assert_return(s, -EINVAL);
2669 assert_return(usec, -EINVAL);
6a0f1f6d 2670 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2671 assert_return(!event_pid_changed(s->event), -ECHILD);
2672
2673 *usec = s->time.accuracy;
2674 return 0;
2675}
2676
f7262a9f 2677_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2678 int r;
6a0f1f6d 2679
305f78bf 2680 assert_return(s, -EINVAL);
f5fbe71d 2681 assert_return(usec != UINT64_MAX, -EINVAL);
6a0f1f6d 2682 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2683 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2684 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2685
2a0dc6cd
LP
2686 r = source_set_pending(s, false);
2687 if (r < 0)
2688 return r;
2689
eaa3cbef
LP
2690 if (usec == 0)
2691 usec = DEFAULT_ACCURACY_USEC;
2692
eaa3cbef
LP
2693 s->time.accuracy = usec;
2694
e1951c16 2695 event_source_time_prioq_reshuffle(s);
6a0f1f6d
LP
2696 return 0;
2697}
2698
2699_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2700 assert_return(s, -EINVAL);
2701 assert_return(clock, -EINVAL);
2702 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2703 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2704
6a0f1f6d 2705 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2706 return 0;
2707}
2708
f7262a9f 2709_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2710 assert_return(s, -EINVAL);
2711 assert_return(pid, -EINVAL);
2712 assert_return(s->type == SOURCE_CHILD, -EDOM);
2713 assert_return(!event_pid_changed(s->event), -ECHILD);
2714
2715 *pid = s->child.pid;
2716 return 0;
2717}
2718
f8f3f926
LP
2719_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2720 assert_return(s, -EINVAL);
2721 assert_return(s->type == SOURCE_CHILD, -EDOM);
2722 assert_return(!event_pid_changed(s->event), -ECHILD);
2723
2724 if (s->child.pidfd < 0)
2725 return -EOPNOTSUPP;
2726
2727 return s->child.pidfd;
2728}
2729
2730_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2731 assert_return(s, -EINVAL);
2732 assert_return(s->type == SOURCE_CHILD, -EDOM);
2733 assert_return(!event_pid_changed(s->event), -ECHILD);
2734 assert_return(SIGNAL_VALID(sig), -EINVAL);
2735
2736 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2737 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2738 * available. */
2739 if (s->child.exited)
2740 return -ESRCH;
2741
2742 if (s->child.pidfd >= 0) {
2743 siginfo_t copy;
2744
2745 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2746 * structure here */
2747 if (si)
2748 copy = *si;
2749
2750 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2751 /* Let's propagate the error only if the system call is not implemented or prohibited */
2752 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2753 return -errno;
2754 } else
2755 return 0;
2756 }
2757
2758 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2759 * this here. */
2760 if (flags != 0)
2761 return -EOPNOTSUPP;
2762
2763 if (si) {
2764 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2765 siginfo_t copy = *si;
2766
2767 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2768 return -errno;
2769 } else if (kill(s->child.pid, sig) < 0)
2770 return -errno;
2771
2772 return 0;
2773}
2774
2775_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2776 assert_return(s, -EINVAL);
2777 assert_return(s->type == SOURCE_CHILD, -EDOM);
2778
2779 if (s->child.pidfd < 0)
2780 return -EOPNOTSUPP;
2781
2782 return s->child.pidfd_owned;
2783}
2784
2785_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2786 assert_return(s, -EINVAL);
2787 assert_return(s->type == SOURCE_CHILD, -EDOM);
2788
2789 if (s->child.pidfd < 0)
2790 return -EOPNOTSUPP;
2791
2792 s->child.pidfd_owned = own;
2793 return 0;
2794}
2795
2796_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2797 assert_return(s, -EINVAL);
2798 assert_return(s->type == SOURCE_CHILD, -EDOM);
2799
2800 return s->child.process_owned;
2801}
2802
2803_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2804 assert_return(s, -EINVAL);
2805 assert_return(s->type == SOURCE_CHILD, -EDOM);
2806
2807 s->child.process_owned = own;
2808 return 0;
2809}
2810
97ef5391
LP
2811_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2812 assert_return(s, -EINVAL);
2813 assert_return(mask, -EINVAL);
2814 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2815 assert_return(!event_pid_changed(s->event), -ECHILD);
2816
2817 *mask = s->inotify.mask;
2818 return 0;
2819}
2820
718db961 2821_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2822 int r;
2823
da7e457c 2824 assert_return(s, -EINVAL);
6203e07a 2825 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2826 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2827 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2828
2829 if (s->prepare == callback)
2830 return 0;
2831
2832 if (callback && s->prepare) {
2833 s->prepare = callback;
2834 return 0;
2835 }
2836
2837 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2838 if (r < 0)
2839 return r;
2840
2841 s->prepare = callback;
2842
2843 if (callback) {
2844 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2845 if (r < 0)
2846 return r;
2847 } else
2848 prioq_remove(s->event->prepare, s, &s->prepare_index);
2849
2850 return 0;
2851}
2852
f7262a9f 2853_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2854 assert_return(s, NULL);
fd38203a
LP
2855
2856 return s->userdata;
2857}
2858
8f726607
LP
2859_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2860 void *ret;
2861
2862 assert_return(s, NULL);
2863
2864 ret = s->userdata;
2865 s->userdata = userdata;
2866
2867 return ret;
2868}
2869
b6d5481b
LP
2870static int event_source_enter_ratelimited(sd_event_source *s) {
2871 int r;
2872
2873 assert(s);
2874
2875 /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
2876 * the end of the rate limit time window, much as if it was a timer event source. */
2877
2878 if (s->ratelimited)
2879 return 0; /* Already ratelimited, this is a NOP hence */
2880
2881 /* Make sure we can install a CLOCK_MONOTONIC event further down. */
2882 r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC);
2883 if (r < 0)
2884 return r;
2885
2886 /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
2887 * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
2888 * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
2889 if (EVENT_SOURCE_IS_TIME(s->type))
2890 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2891
2892 /* Now, let's add the event source to the monotonic clock instead */
2893 r = event_source_time_prioq_put(s, &s->event->monotonic);
2894 if (r < 0)
2895 goto fail;
2896
2897 /* And let's take the event source officially offline */
2898 r = event_source_offline(s, s->enabled, /* ratelimited= */ true);
2899 if (r < 0) {
2900 event_source_time_prioq_remove(s, &s->event->monotonic);
2901 goto fail;
2902 }
2903
2904 event_source_pp_prioq_reshuffle(s);
2905
2906 log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description));
2907 return 0;
2908
2909fail:
2910 /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
2911 * space for it should already be allocated. */
2912 if (EVENT_SOURCE_IS_TIME(s->type))
2913 assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0);
2914
2915 return r;
2916}
2917
fd69f224 2918static int event_source_leave_ratelimit(sd_event_source *s, bool run_callback) {
b6d5481b
LP
2919 int r;
2920
2921 assert(s);
2922
2923 if (!s->ratelimited)
2924 return 0;
2925
2926 /* Let's take the event source out of the monotonic prioq first. */
2927 event_source_time_prioq_remove(s, &s->event->monotonic);
2928
2929 /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
2930 if (EVENT_SOURCE_IS_TIME(s->type)) {
2931 r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type));
2932 if (r < 0)
2933 goto fail;
2934 }
2935
2936 /* Let's try to take it online again. */
2937 r = event_source_online(s, s->enabled, /* ratelimited= */ false);
2938 if (r < 0) {
2939 /* Do something roughly sensible when this failed: undo the two prioq ops above */
2940 if (EVENT_SOURCE_IS_TIME(s->type))
2941 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2942
2943 goto fail;
2944 }
2945
2946 event_source_pp_prioq_reshuffle(s);
2947 ratelimit_reset(&s->rate_limit);
2948
2949 log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description));
fd69f224
MS
2950
2951 if (run_callback && s->ratelimit_expire_callback) {
2952 s->dispatching = true;
2953 r = s->ratelimit_expire_callback(s, s->userdata);
2954 s->dispatching = false;
2955
2956 if (r < 0) {
2957 log_debug_errno(r, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m",
2958 strna(s->description),
2959 event_source_type_to_string(s->type),
2960 s->exit_on_failure ? "exiting" : "disabling");
2961
2962 if (s->exit_on_failure)
2963 (void) sd_event_exit(s->event, r);
2964 }
2965
2966 if (s->n_ref == 0)
2967 source_free(s);
2968 else if (r < 0)
0a040e64 2969 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
fd69f224
MS
2970
2971 return 1;
2972 }
2973
b6d5481b
LP
2974 return 0;
2975
2976fail:
2977 /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
2978 * simply put it back in it, maybe we can then process it more successfully next iteration. */
2979 assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0);
2980
2981 return r;
2982}
2983
c2ba3ad6
LP
2984static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2985 usec_t c;
2986 assert(e);
2987 assert(a <= b);
2988
2989 if (a <= 0)
2990 return 0;
393003e1
LP
2991 if (a >= USEC_INFINITY)
2992 return USEC_INFINITY;
c2ba3ad6
LP
2993
2994 if (b <= a + 1)
2995 return a;
2996
52444dc4
LP
2997 initialize_perturb(e);
2998
c2ba3ad6
LP
2999 /*
3000 Find a good time to wake up again between times a and b. We
3001 have two goals here:
3002
3003 a) We want to wake up as seldom as possible, hence prefer
3004 later times over earlier times.
3005
3006 b) But if we have to wake up, then let's make sure to
3007 dispatch as much as possible on the entire system.
3008
3009 We implement this by waking up everywhere at the same time
850516e0 3010 within any given minute if we can, synchronised via the
c2ba3ad6 3011 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
3012 then we try to find the same spot in every 10s, then 1s and
3013 then 250ms step. Otherwise, we pick the last possible time
3014 to wake up.
c2ba3ad6
LP
3015 */
3016
850516e0
LP
3017 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
3018 if (c >= b) {
3019 if (_unlikely_(c < USEC_PER_MINUTE))
3020 return b;
3021
3022 c -= USEC_PER_MINUTE;
3023 }
3024
ba276c81
LP
3025 if (c >= a)
3026 return c;
3027
3028 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
3029 if (c >= b) {
3030 if (_unlikely_(c < USEC_PER_SEC*10))
3031 return b;
3032
3033 c -= USEC_PER_SEC*10;
3034 }
3035
850516e0
LP
3036 if (c >= a)
3037 return c;
3038
3039 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
3040 if (c >= b) {
3041 if (_unlikely_(c < USEC_PER_SEC))
3042 return b;
3043
3044 c -= USEC_PER_SEC;
3045 }
3046
3047 if (c >= a)
3048 return c;
3049
3050 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
3051 if (c >= b) {
3052 if (_unlikely_(c < USEC_PER_MSEC*250))
3053 return b;
3054
3055 c -= USEC_PER_MSEC*250;
3056 }
3057
3058 if (c >= a)
3059 return c;
3060
3061 return b;
3062}
3063
fd38203a
LP
3064static int event_arm_timer(
3065 sd_event *e,
6a0f1f6d 3066 struct clock_data *d) {
fd38203a
LP
3067
3068 struct itimerspec its = {};
c2ba3ad6
LP
3069 sd_event_source *a, *b;
3070 usec_t t;
fd38203a 3071
cde93897 3072 assert(e);
6a0f1f6d 3073 assert(d);
fd38203a 3074
d06441da 3075 if (!d->needs_rearm)
212bbb17 3076 return 0;
7e2bf71c
YW
3077
3078 d->needs_rearm = false;
212bbb17 3079
6a0f1f6d 3080 a = prioq_peek(d->earliest);
19947509 3081 assert(!a || EVENT_SOURCE_USES_TIME_PRIOQ(a->type));
b6d5481b 3082 if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) {
72aedc1e 3083
6a0f1f6d 3084 if (d->fd < 0)
c57b5ca3
LP
3085 return 0;
3086
3a43da28 3087 if (d->next == USEC_INFINITY)
72aedc1e
LP
3088 return 0;
3089
3090 /* disarm */
15c689d7
LP
3091 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3092 return -errno;
72aedc1e 3093
3a43da28 3094 d->next = USEC_INFINITY;
fd38203a 3095 return 0;
72aedc1e 3096 }
fd38203a 3097
6a0f1f6d 3098 b = prioq_peek(d->latest);
19947509
ZJS
3099 assert(!b || EVENT_SOURCE_USES_TIME_PRIOQ(b->type));
3100 assert(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 3101
b6d5481b 3102 t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b));
6a0f1f6d 3103 if (d->next == t)
fd38203a
LP
3104 return 0;
3105
6a0f1f6d 3106 assert_se(d->fd >= 0);
fd38203a 3107
c2ba3ad6 3108 if (t == 0) {
fd38203a
LP
3109 /* We don' want to disarm here, just mean some time looooong ago. */
3110 its.it_value.tv_sec = 0;
3111 its.it_value.tv_nsec = 1;
3112 } else
c2ba3ad6 3113 timespec_store(&its.it_value, t);
fd38203a 3114
15c689d7 3115 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 3116 return -errno;
fd38203a 3117
6a0f1f6d 3118 d->next = t;
fd38203a
LP
3119 return 0;
3120}
3121
9a800b56 3122static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
3123 assert(e);
3124 assert(s);
3125 assert(s->type == SOURCE_IO);
3126
9a800b56
LP
3127 /* If the event source was already pending, we just OR in the
3128 * new revents, otherwise we reset the value. The ORing is
3129 * necessary to handle EPOLLONESHOT events properly where
3130 * readability might happen independently of writability, and
3131 * we need to keep track of both */
3132
3133 if (s->pending)
3134 s->io.revents |= revents;
3135 else
3136 s->io.revents = revents;
fd38203a 3137
fd38203a
LP
3138 return source_set_pending(s, true);
3139}
3140
72aedc1e 3141static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
3142 uint64_t x;
3143 ssize_t ss;
3144
3145 assert(e);
da7e457c 3146 assert(fd >= 0);
72aedc1e 3147
305f78bf 3148 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
3149
3150 ss = read(fd, &x, sizeof(x));
3151 if (ss < 0) {
8add30a0 3152 if (ERRNO_IS_TRANSIENT(errno))
fd38203a
LP
3153 return 0;
3154
3155 return -errno;
3156 }
3157
8d35dae7 3158 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
3159 return -EIO;
3160
cde93897 3161 if (next)
3a43da28 3162 *next = USEC_INFINITY;
72aedc1e 3163
fd38203a
LP
3164 return 0;
3165}
3166
305f78bf
LP
3167static int process_timer(
3168 sd_event *e,
3169 usec_t n,
6a0f1f6d 3170 struct clock_data *d) {
305f78bf 3171
fd38203a 3172 sd_event_source *s;
fd69f224 3173 bool callback_invoked = false;
fd38203a
LP
3174 int r;
3175
3176 assert(e);
6a0f1f6d 3177 assert(d);
fd38203a
LP
3178
3179 for (;;) {
6a0f1f6d 3180 s = prioq_peek(d->earliest);
19947509
ZJS
3181 assert(!s || EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
3182
b6d5481b
LP
3183 if (!s || time_event_source_next(s) > n)
3184 break;
3185
3186 if (s->ratelimited) {
3187 /* This is an event sources whose ratelimit window has ended. Let's turn it on
3188 * again. */
3189 assert(s->ratelimited);
3190
fd69f224 3191 r = event_source_leave_ratelimit(s, /* run_callback */ true);
b6d5481b
LP
3192 if (r < 0)
3193 return r;
fd69f224
MS
3194 else if (r == 1)
3195 callback_invoked = true;
b6d5481b
LP
3196
3197 continue;
3198 }
3199
3200 if (s->enabled == SD_EVENT_OFF || s->pending)
fd38203a
LP
3201 break;
3202
3203 r = source_set_pending(s, true);
3204 if (r < 0)
3205 return r;
3206
e1951c16 3207 event_source_time_prioq_reshuffle(s);
fd38203a
LP
3208 }
3209
fd69f224 3210 return callback_invoked;
fd38203a
LP
3211}
3212
efd3be9d
YW
3213static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
3214 int64_t min_priority = threshold;
3215 bool something_new = false;
fd38203a 3216 sd_event_source *s;
fd38203a
LP
3217 int r;
3218
3219 assert(e);
efd3be9d
YW
3220 assert(ret_min_priority);
3221
3222 if (!e->need_process_child) {
3223 *ret_min_priority = min_priority;
3224 return 0;
3225 }
fd38203a 3226
c2ba3ad6
LP
3227 e->need_process_child = false;
3228
91c70071
YW
3229 /* So, this is ugly. We iteratively invoke waitid() with P_PID + WNOHANG for each PID we wait
3230 * for, instead of using P_ALL. This is because we only want to get child information of very
3231 * specific child processes, and not all of them. We might not have processed the SIGCHLD event
3232 * of a previous invocation and we don't want to maintain a unbounded *per-child* event queue,
3233 * hence we really don't want anything flushed out of the kernel's queue that we don't care
3234 * about. Since this is O(n) this means that if you have a lot of processes you probably want
3235 * to handle SIGCHLD yourself.
3236 *
3237 * We do not reap the children here (by using WNOWAIT), this is only done after the event
3238 * source is dispatched so that the callback still sees the process as a zombie. */
fd38203a 3239
90e74a66 3240 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
3241 assert(s->type == SOURCE_CHILD);
3242
efd3be9d
YW
3243 if (s->priority > threshold)
3244 continue;
3245
fd38203a
LP
3246 if (s->pending)
3247 continue;
3248
b6d5481b 3249 if (event_source_is_offline(s))
fd38203a
LP
3250 continue;
3251
f8f3f926
LP
3252 if (s->child.exited)
3253 continue;
3254
91c70071
YW
3255 if (EVENT_SOURCE_WATCH_PIDFD(s))
3256 /* There's a usable pidfd known for this event source? Then don't waitid() for
3257 * it here */
f8f3f926
LP
3258 continue;
3259
fd38203a 3260 zero(s->child.siginfo);
15c689d7
LP
3261 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
3262 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
bfd9bfcc 3263 return negative_errno();
fd38203a
LP
3264
3265 if (s->child.siginfo.si_pid != 0) {
945c2931 3266 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3267
f8f3f926
LP
3268 if (zombie)
3269 s->child.exited = true;
3270
08cd1552 3271 if (!zombie && (s->child.options & WEXITED)) {
91c70071
YW
3272 /* If the child isn't dead then let's immediately remove the state
3273 * change from the queue, since there's no benefit in leaving it
3274 * queued. */
08cd1552
LP
3275
3276 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 3277 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
3278 }
3279
fd38203a
LP
3280 r = source_set_pending(s, true);
3281 if (r < 0)
3282 return r;
efd3be9d
YW
3283 if (r > 0) {
3284 something_new = true;
3285 min_priority = MIN(min_priority, s->priority);
3286 }
fd38203a
LP
3287 }
3288 }
3289
efd3be9d
YW
3290 *ret_min_priority = min_priority;
3291 return something_new;
fd38203a
LP
3292}
3293
f8f3f926
LP
3294static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
3295 assert(e);
3296 assert(s);
3297 assert(s->type == SOURCE_CHILD);
3298
3299 if (s->pending)
3300 return 0;
3301
b6d5481b 3302 if (event_source_is_offline(s))
f8f3f926
LP
3303 return 0;
3304
3305 if (!EVENT_SOURCE_WATCH_PIDFD(s))
3306 return 0;
3307
3308 zero(s->child.siginfo);
3309 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
3310 return -errno;
3311
3312 if (s->child.siginfo.si_pid == 0)
3313 return 0;
3314
3315 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
3316 s->child.exited = true;
3317
3318 return source_set_pending(s, true);
3319}
3320
efd3be9d 3321static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
fd38203a
LP
3322 int r;
3323
da7e457c 3324 assert(e);
97ef5391 3325 assert(d);
305f78bf 3326 assert_return(events == EPOLLIN, -EIO);
efd3be9d 3327 assert(min_priority);
fd38203a 3328
91c70071
YW
3329 /* If there's a signal queued on this priority and SIGCHLD is on this priority too, then make
3330 * sure to recheck the children we watch. This is because we only ever dequeue the first signal
3331 * per priority, and if we dequeue one, and SIGCHLD might be enqueued later we wouldn't know,
3332 * but we might have higher priority children we care about hence we need to check that
3333 * explicitly. */
9da4cb2b
LP
3334
3335 if (sigismember(&d->sigset, SIGCHLD))
3336 e->need_process_child = true;
3337
91c70071 3338 /* If there's already an event source pending for this priority we don't read another */
9da4cb2b
LP
3339 if (d->current)
3340 return 0;
3341
fd38203a 3342 for (;;) {
0eb2e0e3 3343 struct signalfd_siginfo si;
7057bd99 3344 ssize_t n;
92daebc0 3345 sd_event_source *s = NULL;
fd38203a 3346
9da4cb2b 3347 n = read(d->fd, &si, sizeof(si));
7057bd99 3348 if (n < 0) {
8add30a0 3349 if (ERRNO_IS_TRANSIENT(errno))
efd3be9d 3350 return 0;
fd38203a
LP
3351
3352 return -errno;
3353 }
3354
7057bd99 3355 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3356 return -EIO;
3357
6eb7c172 3358 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3359
92daebc0
LP
3360 if (e->signal_sources)
3361 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3362 if (!s)
3363 continue;
9da4cb2b
LP
3364 if (s->pending)
3365 continue;
fd38203a
LP
3366
3367 s->signal.siginfo = si;
9da4cb2b
LP
3368 d->current = s;
3369
fd38203a
LP
3370 r = source_set_pending(s, true);
3371 if (r < 0)
3372 return r;
efd3be9d
YW
3373 if (r > 0 && *min_priority >= s->priority) {
3374 *min_priority = s->priority;
3375 return 1; /* an event source with smaller priority is queued. */
3376 }
9da4cb2b 3377
efd3be9d 3378 return 0;
fd38203a 3379 }
fd38203a
LP
3380}
3381
efd3be9d 3382static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) {
97ef5391
LP
3383 ssize_t n;
3384
3385 assert(e);
3386 assert(d);
3387
3388 assert_return(revents == EPOLLIN, -EIO);
3389
3390 /* If there's already an event source pending for this priority, don't read another */
3391 if (d->n_pending > 0)
3392 return 0;
3393
3394 /* Is the read buffer non-empty? If so, let's not read more */
3395 if (d->buffer_filled > 0)
3396 return 0;
3397
efd3be9d
YW
3398 if (d->priority > threshold)
3399 return 0;
3400
97ef5391
LP
3401 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3402 if (n < 0) {
8add30a0 3403 if (ERRNO_IS_TRANSIENT(errno))
97ef5391
LP
3404 return 0;
3405
3406 return -errno;
3407 }
3408
3409 assert(n > 0);
3410 d->buffer_filled = (size_t) n;
3411 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3412
3413 return 1;
3414}
3415
3416static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3417 assert(e);
3418 assert(d);
3419 assert(sz <= d->buffer_filled);
3420
3421 if (sz == 0)
3422 return;
3423
3424 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3425 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3426 d->buffer_filled -= sz;
3427
3428 if (d->buffer_filled == 0)
3429 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3430}
3431
3432static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3433 int r;
3434
3435 assert(e);
3436 assert(d);
3437
3438 /* If there's already an event source pending for this priority, don't read another */
3439 if (d->n_pending > 0)
3440 return 0;
3441
3442 while (d->buffer_filled > 0) {
3443 size_t sz;
3444
3445 /* Let's validate that the event structures are complete */
3446 if (d->buffer_filled < offsetof(struct inotify_event, name))
3447 return -EIO;
3448
3449 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3450 if (d->buffer_filled < sz)
3451 return -EIO;
3452
3453 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3454 struct inode_data *inode_data;
97ef5391
LP
3455
3456 /* The queue overran, let's pass this event to all event sources connected to this inotify
3457 * object */
3458
03677889 3459 HASHMAP_FOREACH(inode_data, d->inodes)
97ef5391
LP
3460 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3461
b6d5481b 3462 if (event_source_is_offline(s))
97ef5391
LP
3463 continue;
3464
3465 r = source_set_pending(s, true);
3466 if (r < 0)
3467 return r;
3468 }
97ef5391
LP
3469 } else {
3470 struct inode_data *inode_data;
97ef5391
LP
3471
3472 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3473 * our watch descriptor table. */
3474 if (d->buffer.ev.mask & IN_IGNORED) {
3475
3476 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3477 if (!inode_data) {
3478 event_inotify_data_drop(e, d, sz);
3479 continue;
3480 }
3481
3482 /* The watch descriptor was removed by the kernel, let's drop it here too */
3483 inode_data->wd = -1;
3484 } else {
3485 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3486 if (!inode_data) {
3487 event_inotify_data_drop(e, d, sz);
3488 continue;
3489 }
3490 }
3491
3492 /* Trigger all event sources that are interested in these events. Also trigger all event
3493 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3494 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3495
b6d5481b 3496 if (event_source_is_offline(s))
97ef5391
LP
3497 continue;
3498
3499 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3500 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3501 continue;
3502
3503 r = source_set_pending(s, true);
3504 if (r < 0)
3505 return r;
3506 }
3507 }
3508
3509 /* Something pending now? If so, let's finish, otherwise let's read more. */
3510 if (d->n_pending > 0)
3511 return 1;
3512 }
3513
3514 return 0;
3515}
3516
3517static int process_inotify(sd_event *e) {
97ef5391
LP
3518 int r, done = 0;
3519
3520 assert(e);
3521
3522 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3523 r = event_inotify_data_process(e, d);
3524 if (r < 0)
3525 return r;
3526 if (r > 0)
3527 done ++;
3528 }
3529
3530 return done;
3531}
3532
fd38203a 3533static int source_dispatch(sd_event_source *s) {
b778cba4 3534 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
8f5c235d 3535 EventSourceType saved_type;
fe8245eb 3536 int r = 0;
fd38203a
LP
3537
3538 assert(s);
6203e07a 3539 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3540
b778cba4
LP
3541 /* Save the event source type, here, so that we still know it after the event callback which might
3542 * invalidate the event. */
8f5c235d
LP
3543 saved_type = s->type;
3544
de02634c 3545 /* Similarly, store a reference to the event loop object, so that we can still access it after the
b778cba4
LP
3546 * callback might have invalidated/disconnected the event source. */
3547 saved_event = sd_event_ref(s->event);
3548
de02634c 3549 /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
b6d5481b
LP
3550 assert(!s->ratelimited);
3551 if (!ratelimit_below(&s->rate_limit)) {
3552 r = event_source_enter_ratelimited(s);
3553 if (r < 0)
3554 return r;
3555
3556 return 1;
3557 }
3558
945c2931 3559 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3560 r = source_set_pending(s, false);
3561 if (r < 0)
3562 return r;
3563 }
fd38203a 3564
6e9feda3
LP
3565 if (s->type != SOURCE_POST) {
3566 sd_event_source *z;
6e9feda3 3567
de02634c 3568 /* If we execute a non-post source, let's mark all post sources as pending. */
6e9feda3 3569
90e74a66 3570 SET_FOREACH(z, s->event->post_sources) {
b6d5481b 3571 if (event_source_is_offline(z))
6e9feda3
LP
3572 continue;
3573
3574 r = source_set_pending(z, true);
3575 if (r < 0)
3576 return r;
3577 }
3578 }
3579
baf76283
LP
3580 if (s->enabled == SD_EVENT_ONESHOT) {
3581 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3582 if (r < 0)
3583 return r;
3584 }
3585
12179984 3586 s->dispatching = true;
b7484e2a 3587
fd38203a
LP
3588 switch (s->type) {
3589
3590 case SOURCE_IO:
3591 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3592 break;
3593
6a0f1f6d 3594 case SOURCE_TIME_REALTIME:
a8548816 3595 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3596 case SOURCE_TIME_MONOTONIC:
3597 case SOURCE_TIME_REALTIME_ALARM:
3598 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3599 r = s->time.callback(s, s->time.next, s->userdata);
3600 break;
3601
3602 case SOURCE_SIGNAL:
3603 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3604 break;
3605
08cd1552
LP
3606 case SOURCE_CHILD: {
3607 bool zombie;
3608
945c2931 3609 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3610
fd38203a 3611 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3612
3613 /* Now, reap the PID for good. */
f8f3f926 3614 if (zombie) {
cc59d290 3615 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3616 s->child.waited = true;
3617 }
08cd1552 3618
fd38203a 3619 break;
08cd1552 3620 }
fd38203a
LP
3621
3622 case SOURCE_DEFER:
3623 r = s->defer.callback(s, s->userdata);
3624 break;
da7e457c 3625
6e9feda3
LP
3626 case SOURCE_POST:
3627 r = s->post.callback(s, s->userdata);
3628 break;
3629
6203e07a
LP
3630 case SOURCE_EXIT:
3631 r = s->exit.callback(s, s->userdata);
da7e457c 3632 break;
9d3e3aa5 3633
97ef5391
LP
3634 case SOURCE_INOTIFY: {
3635 struct sd_event *e = s->event;
3636 struct inotify_data *d;
3637 size_t sz;
3638
3639 assert(s->inotify.inode_data);
3640 assert_se(d = s->inotify.inode_data->inotify_data);
3641
3642 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3643 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3644 assert(d->buffer_filled >= sz);
3645
53baf2ef
LP
3646 /* If the inotify callback destroys the event source then this likely means we don't need to
3647 * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd
3648 * free it immediately, then we couldn't drop the event from the inotify event queue without
3649 * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it
3650 * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then
3651 * explicitly GC it after we are done dropping the inotify event from the buffer. */
3652 d->n_busy++;
97ef5391 3653 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
53baf2ef 3654 d->n_busy--;
97ef5391 3655
53baf2ef
LP
3656 /* When no event is pending anymore on this inotify object, then let's drop the event from
3657 * the inotify event queue buffer. */
97ef5391
LP
3658 if (d->n_pending == 0)
3659 event_inotify_data_drop(e, d, sz);
3660
53baf2ef
LP
3661 /* Now we don't want to access 'd' anymore, it's OK to GC now. */
3662 event_gc_inotify_data(e, d);
97ef5391
LP
3663 break;
3664 }
3665
9d3e3aa5 3666 case SOURCE_WATCHDOG:
a71fe8b8 3667 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3668 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
04499a70 3669 assert_not_reached();
fd38203a
LP
3670 }
3671
12179984
LP
3672 s->dispatching = false;
3673
b778cba4
LP
3674 if (r < 0) {
3675 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3676 strna(s->description),
3677 event_source_type_to_string(saved_type),
3678 s->exit_on_failure ? "exiting" : "disabling");
3679
3680 if (s->exit_on_failure)
3681 (void) sd_event_exit(saved_event, r);
3682 }
12179984
LP
3683
3684 if (s->n_ref == 0)
3685 source_free(s);
3686 else if (r < 0)
c3c50474 3687 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
b7484e2a 3688
6203e07a 3689 return 1;
fd38203a
LP
3690}
3691
3692static int event_prepare(sd_event *e) {
3693 int r;
3694
3695 assert(e);
3696
3697 for (;;) {
3698 sd_event_source *s;
3699
3700 s = prioq_peek(e->prepare);
b6d5481b 3701 if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s))
fd38203a
LP
3702 break;
3703
3704 s->prepare_iteration = e->iteration;
3705 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3706 if (r < 0)
3707 return r;
3708
3709 assert(s->prepare);
12179984
LP
3710
3711 s->dispatching = true;
fd38203a 3712 r = s->prepare(s, s->userdata);
12179984
LP
3713 s->dispatching = false;
3714
b778cba4
LP
3715 if (r < 0) {
3716 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3717 strna(s->description),
3718 event_source_type_to_string(s->type),
3719 s->exit_on_failure ? "exiting" : "disabling");
3720
3721 if (s->exit_on_failure)
3722 (void) sd_event_exit(e, r);
3723 }
fd38203a 3724
12179984
LP
3725 if (s->n_ref == 0)
3726 source_free(s);
3727 else if (r < 0)
c3c50474 3728 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
fd38203a
LP
3729 }
3730
3731 return 0;
3732}
3733
6203e07a 3734static int dispatch_exit(sd_event *e) {
da7e457c
LP
3735 sd_event_source *p;
3736 int r;
3737
3738 assert(e);
3739
6203e07a 3740 p = prioq_peek(e->exit);
19947509
ZJS
3741 assert(!p || p->type == SOURCE_EXIT);
3742
b6d5481b 3743 if (!p || event_source_is_offline(p)) {
da7e457c
LP
3744 e->state = SD_EVENT_FINISHED;
3745 return 0;
3746 }
3747
f814c871 3748 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
da7e457c 3749 e->iteration++;
6203e07a 3750 e->state = SD_EVENT_EXITING;
da7e457c 3751 r = source_dispatch(p);
2b0c9ef7 3752 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3753 return r;
3754}
3755
c2ba3ad6
LP
3756static sd_event_source* event_next_pending(sd_event *e) {
3757 sd_event_source *p;
3758
da7e457c
LP
3759 assert(e);
3760
c2ba3ad6
LP
3761 p = prioq_peek(e->pending);
3762 if (!p)
3763 return NULL;
3764
b6d5481b 3765 if (event_source_is_offline(p))
c2ba3ad6
LP
3766 return NULL;
3767
3768 return p;
3769}
3770
cde93897
LP
3771static int arm_watchdog(sd_event *e) {
3772 struct itimerspec its = {};
3773 usec_t t;
cde93897
LP
3774
3775 assert(e);
3776 assert(e->watchdog_fd >= 0);
3777
3778 t = sleep_between(e,
a595fb5c
YW
3779 usec_add(e->watchdog_last, (e->watchdog_period / 2)),
3780 usec_add(e->watchdog_last, (e->watchdog_period * 3 / 4)));
cde93897
LP
3781
3782 timespec_store(&its.it_value, t);
3783
75145780
LP
3784 /* Make sure we never set the watchdog to 0, which tells the
3785 * kernel to disable it. */
3786 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3787 its.it_value.tv_nsec = 1;
3788
7c248223 3789 return RET_NERRNO(timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL));
cde93897
LP
3790}
3791
3792static int process_watchdog(sd_event *e) {
3793 assert(e);
3794
3795 if (!e->watchdog)
3796 return 0;
3797
3798 /* Don't notify watchdog too often */
3799 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3800 return 0;
3801
3802 sd_notify(false, "WATCHDOG=1");
3803 e->watchdog_last = e->timestamp.monotonic;
3804
3805 return arm_watchdog(e);
3806}
3807
97ef5391
LP
3808static void event_close_inode_data_fds(sd_event *e) {
3809 struct inode_data *d;
3810
3811 assert(e);
3812
3813 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3814 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3815 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3816 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3817 * compromise. */
3818
3819 while ((d = e->inode_data_to_close)) {
3820 assert(d->fd >= 0);
3821 d->fd = safe_close(d->fd);
3822
3823 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3824 }
3825}
3826
c45a5a74
TG
3827_public_ int sd_event_prepare(sd_event *e) {
3828 int r;
fd38203a 3829
da7e457c 3830 assert_return(e, -EINVAL);
b937d761 3831 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3832 assert_return(!event_pid_changed(e), -ECHILD);
3833 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3834 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3835
e5446015
LP
3836 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3837 * this check here once, since gettid() is typically not cached, and thus want to minimize
3838 * syscalls */
3839 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3840
f814c871
LP
3841 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
3842 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3843
6203e07a 3844 if (e->exit_requested)
c45a5a74 3845 goto pending;
fd38203a
LP
3846
3847 e->iteration++;
3848
0be6c2f6 3849 e->state = SD_EVENT_PREPARING;
fd38203a 3850 r = event_prepare(e);
0be6c2f6 3851 e->state = SD_EVENT_INITIAL;
fd38203a 3852 if (r < 0)
c45a5a74 3853 return r;
fd38203a 3854
6a0f1f6d
LP
3855 r = event_arm_timer(e, &e->realtime);
3856 if (r < 0)
c45a5a74 3857 return r;
6a0f1f6d 3858
a8548816
TG
3859 r = event_arm_timer(e, &e->boottime);
3860 if (r < 0)
c45a5a74 3861 return r;
a8548816 3862
6a0f1f6d
LP
3863 r = event_arm_timer(e, &e->monotonic);
3864 if (r < 0)
c45a5a74 3865 return r;
6a0f1f6d
LP
3866
3867 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3868 if (r < 0)
c45a5a74 3869 return r;
fd38203a 3870
6a0f1f6d 3871 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3872 if (r < 0)
c45a5a74 3873 return r;
fd38203a 3874
97ef5391
LP
3875 event_close_inode_data_fds(e);
3876
067fc917 3877 if (event_next_pending(e) || e->need_process_child || !LIST_IS_EMPTY(e->inotify_data_buffered))
c45a5a74
TG
3878 goto pending;
3879
2b0c9ef7 3880 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3881
3882 return 0;
3883
3884pending:
2b0c9ef7 3885 e->state = SD_EVENT_ARMED;
6d148a84
TG
3886 r = sd_event_wait(e, 0);
3887 if (r == 0)
2b0c9ef7 3888 e->state = SD_EVENT_ARMED;
6d148a84
TG
3889
3890 return r;
c45a5a74
TG
3891}
3892
798445ab
LP
3893static int epoll_wait_usec(
3894 int fd,
3895 struct epoll_event *events,
3896 int maxevents,
3897 usec_t timeout) {
3898
7c248223 3899 int msec;
39f756d3
ZJS
3900#if 0
3901 static bool epoll_pwait2_absent = false;
52bb308c 3902 int r;
798445ab 3903
39f756d3
ZJS
3904 /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not.
3905 *
3906 * FIXME: this is temporarily disabled until epoll_pwait2() becomes more widely available.
3907 * See https://github.com/systemd/systemd/pull/18973 and
3908 * https://github.com/systemd/systemd/issues/19052. */
798445ab
LP
3909
3910 if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
798445ab
LP
3911 r = epoll_pwait2(fd,
3912 events,
3913 maxevents,
52bb308c 3914 TIMESPEC_STORE(timeout),
798445ab
LP
3915 NULL);
3916 if (r >= 0)
3917 return r;
7cb45dbf 3918 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
798445ab
LP
3919 return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
3920 * supported. */
3921
3922 epoll_pwait2_absent = true;
3923 }
39f756d3 3924#endif
798445ab
LP
3925
3926 if (timeout == USEC_INFINITY)
3927 msec = -1;
3928 else {
3929 usec_t k;
3930
3931 k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
3932 if (k >= INT_MAX)
3933 msec = INT_MAX; /* Saturate */
3934 else
3935 msec = (int) k;
3936 }
3937
7c248223 3938 return RET_NERRNO(epoll_wait(fd, events, maxevents, msec));
798445ab
LP
3939}
3940
efd3be9d 3941static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
319a4f4b 3942 size_t n_event_queue, m, n_event_max;
efd3be9d
YW
3943 int64_t min_priority = threshold;
3944 bool something_new = false;
798445ab 3945 int r;
c45a5a74 3946
efd3be9d
YW
3947 assert(e);
3948 assert(ret_min_priority);
6a0f1f6d 3949
8b9708d1 3950 n_event_queue = MAX(e->n_sources, 1u);
319a4f4b 3951 if (!GREEDY_REALLOC(e->event_queue, n_event_queue))
5cddd924 3952 return -ENOMEM;
fd38203a 3953
319a4f4b
LP
3954 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
3955
97ef5391 3956 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
32861b4c 3957 if (!LIST_IS_EMPTY(e->inotify_data_buffered))
798445ab 3958 timeout = 0;
97ef5391 3959
8b9708d1 3960 for (;;) {
319a4f4b
LP
3961 r = epoll_wait_usec(
3962 e->epoll_fd,
3963 e->event_queue,
3964 n_event_max,
3965 timeout);
798445ab 3966 if (r < 0)
efd3be9d 3967 return r;
c45a5a74 3968
8b9708d1
YW
3969 m = (size_t) r;
3970
319a4f4b 3971 if (m < n_event_max)
8b9708d1
YW
3972 break;
3973
319a4f4b 3974 if (n_event_max >= n_event_queue * 10)
8b9708d1
YW
3975 break;
3976
319a4f4b 3977 if (!GREEDY_REALLOC(e->event_queue, n_event_max + n_event_queue))
8b9708d1
YW
3978 return -ENOMEM;
3979
319a4f4b 3980 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
798445ab 3981 timeout = 0;
da7e457c 3982 }
fd38203a 3983
efd3be9d
YW
3984 /* Set timestamp only when this is called first time. */
3985 if (threshold == INT64_MAX)
3986 triple_timestamp_get(&e->timestamp);
fd38203a 3987
8b9708d1 3988 for (size_t i = 0; i < m; i++) {
fd38203a 3989
5cddd924
LP
3990 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3991 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 3992 else {
5cddd924 3993 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
3994
3995 switch (*t) {
3996
f8f3f926 3997 case WAKEUP_EVENT_SOURCE: {
5cddd924 3998 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
3999
4000 assert(s);
4001
efd3be9d
YW
4002 if (s->priority > threshold)
4003 continue;
4004
4005 min_priority = MIN(min_priority, s->priority);
4006
f8f3f926
LP
4007 switch (s->type) {
4008
4009 case SOURCE_IO:
5cddd924 4010 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
4011 break;
4012
4013 case SOURCE_CHILD:
5cddd924 4014 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
4015 break;
4016
4017 default:
04499a70 4018 assert_not_reached();
f8f3f926
LP
4019 }
4020
9da4cb2b 4021 break;
f8f3f926 4022 }
fd38203a 4023
9da4cb2b 4024 case WAKEUP_CLOCK_DATA: {
5cddd924 4025 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
4026
4027 assert(d);
4028
5cddd924 4029 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
4030 break;
4031 }
4032
4033 case WAKEUP_SIGNAL_DATA:
efd3be9d 4034 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority);
9da4cb2b
LP
4035 break;
4036
97ef5391 4037 case WAKEUP_INOTIFY_DATA:
efd3be9d 4038 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold);
97ef5391
LP
4039 break;
4040
9da4cb2b 4041 default:
04499a70 4042 assert_not_reached();
9da4cb2b
LP
4043 }
4044 }
efd3be9d
YW
4045 if (r < 0)
4046 return r;
4047 if (r > 0)
4048 something_new = true;
4049 }
4050
4051 *ret_min_priority = min_priority;
4052 return something_new;
4053}
4054
4055_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
4056 int r;
4057
4058 assert_return(e, -EINVAL);
4059 assert_return(e = event_resolve(e), -ENOPKG);
4060 assert_return(!event_pid_changed(e), -ECHILD);
4061 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4062 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
4063
4064 if (e->exit_requested) {
4065 e->state = SD_EVENT_PENDING;
4066 return 1;
4067 }
4068
4069 for (int64_t threshold = INT64_MAX; ; threshold--) {
4070 int64_t epoll_min_priority, child_min_priority;
4071
4072 /* There may be a possibility that new epoll (especially IO) and child events are
4073 * triggered just after process_epoll() call but before process_child(), and the new IO
4074 * events may have higher priority than the child events. To salvage these events,
4075 * let's call epoll_wait() again, but accepts only events with higher priority than the
4076 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
4077 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
4078 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
4079
4080 r = process_epoll(e, timeout, threshold, &epoll_min_priority);
4081 if (r == -EINTR) {
4082 e->state = SD_EVENT_PENDING;
4083 return 1;
4084 }
4085 if (r < 0)
4086 goto finish;
4087 if (r == 0 && threshold < INT64_MAX)
4088 /* No new epoll event. */
4089 break;
4090
4091 r = process_child(e, threshold, &child_min_priority);
fd38203a 4092 if (r < 0)
da7e457c 4093 goto finish;
efd3be9d
YW
4094 if (r == 0)
4095 /* No new child event. */
4096 break;
4097
4098 threshold = MIN(epoll_min_priority, child_min_priority);
4099 if (threshold == INT64_MIN)
4100 break;
4101
4102 timeout = 0;
fd38203a
LP
4103 }
4104
cde93897
LP
4105 r = process_watchdog(e);
4106 if (r < 0)
4107 goto finish;
4108
fd69f224 4109 r = process_inotify(e);
6a0f1f6d
LP
4110 if (r < 0)
4111 goto finish;
4112
fd69f224 4113 r = process_timer(e, e->timestamp.realtime, &e->realtime);
a8548816
TG
4114 if (r < 0)
4115 goto finish;
4116
fd69f224 4117 r = process_timer(e, e->timestamp.boottime, &e->boottime);
6a0f1f6d
LP
4118 if (r < 0)
4119 goto finish;
4120
4121 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 4122 if (r < 0)
da7e457c 4123 goto finish;
fd38203a 4124
e475d10c 4125 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 4126 if (r < 0)
da7e457c 4127 goto finish;
fd38203a 4128
fd69f224 4129 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
97ef5391
LP
4130 if (r < 0)
4131 goto finish;
fd69f224
MS
4132 else if (r == 1) {
4133 /* Ratelimit expiry callback was called. Let's postpone processing pending sources and
4134 * put loop in the initial state in order to evaluate (in the next iteration) also sources
4135 * there were potentially re-enabled by the callback.
4136 *
4137 * Wondering why we treat only this invocation of process_timer() differently? Once event
4138 * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence
4139 * ratelimit expiry callback is never called for any other timer type. */
4140 r = 0;
4141 goto finish;
4142 }
97ef5391 4143
c45a5a74
TG
4144 if (event_next_pending(e)) {
4145 e->state = SD_EVENT_PENDING;
c45a5a74 4146 return 1;
da7e457c
LP
4147 }
4148
c45a5a74 4149 r = 0;
fd38203a 4150
da7e457c 4151finish:
2b0c9ef7 4152 e->state = SD_EVENT_INITIAL;
da7e457c
LP
4153
4154 return r;
fd38203a
LP
4155}
4156
c45a5a74
TG
4157_public_ int sd_event_dispatch(sd_event *e) {
4158 sd_event_source *p;
4159 int r;
4160
4161 assert_return(e, -EINVAL);
b937d761 4162 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4163 assert_return(!event_pid_changed(e), -ECHILD);
4164 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4165 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
4166
4167 if (e->exit_requested)
4168 return dispatch_exit(e);
4169
4170 p = event_next_pending(e);
4171 if (p) {
f814c871 4172 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
c45a5a74
TG
4173
4174 e->state = SD_EVENT_RUNNING;
4175 r = source_dispatch(p);
2b0c9ef7 4176 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4177 return r;
4178 }
4179
2b0c9ef7 4180 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
4181
4182 return 1;
4183}
4184
34b87517 4185static void event_log_delays(sd_event *e) {
442ac269
YW
4186 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
4187 size_t l, i;
34b87517 4188
442ac269
YW
4189 p = b;
4190 l = sizeof(b);
4191 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
4192 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
4193 e->delays[i] = 0;
4194 }
442ac269 4195 log_debug("Event loop iterations: %s", b);
34b87517
VC
4196}
4197
c45a5a74
TG
4198_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
4199 int r;
4200
4201 assert_return(e, -EINVAL);
b937d761 4202 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
4203 assert_return(!event_pid_changed(e), -ECHILD);
4204 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 4205 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 4206
e6a7bee5 4207 if (e->profile_delays && e->last_run_usec != 0) {
34b87517
VC
4208 usec_t this_run;
4209 unsigned l;
4210
4211 this_run = now(CLOCK_MONOTONIC);
4212
58c34be8 4213 l = log2u64(this_run - e->last_run_usec);
cb9d621e 4214 assert(l < ELEMENTSOF(e->delays));
34b87517
VC
4215 e->delays[l]++;
4216
e6a7bee5 4217 if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
34b87517 4218 event_log_delays(e);
e6a7bee5 4219 e->last_log_usec = this_run;
34b87517
VC
4220 }
4221 }
4222
f814c871
LP
4223 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4224 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
4225
c45a5a74 4226 r = sd_event_prepare(e);
53bac4e0
LP
4227 if (r == 0)
4228 /* There was nothing? Then wait... */
4229 r = sd_event_wait(e, timeout);
c45a5a74 4230
34b87517 4231 if (e->profile_delays)
e6a7bee5 4232 e->last_run_usec = now(CLOCK_MONOTONIC);
34b87517 4233
02d30981 4234 if (r > 0) {
53bac4e0 4235 /* There's something now, then let's dispatch it */
02d30981
TG
4236 r = sd_event_dispatch(e);
4237 if (r < 0)
4238 return r;
53bac4e0
LP
4239
4240 return 1;
4241 }
4242
4243 return r;
c45a5a74
TG
4244}
4245
f7262a9f 4246_public_ int sd_event_loop(sd_event *e) {
fd38203a
LP
4247 int r;
4248
da7e457c 4249 assert_return(e, -EINVAL);
b937d761 4250 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 4251 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 4252 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 4253
9f6ef467 4254 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
fd38203a 4255
da7e457c 4256 while (e->state != SD_EVENT_FINISHED) {
f5fbe71d 4257 r = sd_event_run(e, UINT64_MAX);
fd38203a 4258 if (r < 0)
30dd293c 4259 return r;
fd38203a
LP
4260 }
4261
30dd293c 4262 return e->exit_code;
fd38203a
LP
4263}
4264
9b364545 4265_public_ int sd_event_get_fd(sd_event *e) {
9b364545 4266 assert_return(e, -EINVAL);
b937d761 4267 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
4268 assert_return(!event_pid_changed(e), -ECHILD);
4269
4270 return e->epoll_fd;
4271}
4272
f7262a9f 4273_public_ int sd_event_get_state(sd_event *e) {
da7e457c 4274 assert_return(e, -EINVAL);
b937d761 4275 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4276 assert_return(!event_pid_changed(e), -ECHILD);
4277
4278 return e->state;
4279}
4280
6203e07a 4281_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 4282 assert_return(e, -EINVAL);
b937d761 4283 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 4284 assert_return(code, -EINVAL);
da7e457c 4285 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4286
6203e07a
LP
4287 if (!e->exit_requested)
4288 return -ENODATA;
4289
4290 *code = e->exit_code;
4291 return 0;
fd38203a
LP
4292}
4293
6203e07a 4294_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 4295 assert_return(e, -EINVAL);
b937d761 4296 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
4297 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4298 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 4299
6203e07a
LP
4300 e->exit_requested = true;
4301 e->exit_code = code;
4302
fd38203a
LP
4303 return 0;
4304}
46e8c825 4305
6a0f1f6d 4306_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 4307 assert_return(e, -EINVAL);
b937d761 4308 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 4309 assert_return(usec, -EINVAL);
46e8c825
LP
4310 assert_return(!event_pid_changed(e), -ECHILD);
4311
e475d10c
LP
4312 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
4313 return -EOPNOTSUPP;
4314
e475d10c 4315 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 4316 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
4317 *usec = now(clock);
4318 return 1;
4319 }
46e8c825 4320
e475d10c 4321 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
4322 return 0;
4323}
afc6adb5
LP
4324
4325_public_ int sd_event_default(sd_event **ret) {
39883f62 4326 sd_event *e = NULL;
afc6adb5
LP
4327 int r;
4328
4329 if (!ret)
4330 return !!default_event;
4331
4332 if (default_event) {
4333 *ret = sd_event_ref(default_event);
4334 return 0;
4335 }
4336
4337 r = sd_event_new(&e);
4338 if (r < 0)
4339 return r;
4340
4341 e->default_event_ptr = &default_event;
4342 e->tid = gettid();
4343 default_event = e;
4344
4345 *ret = e;
4346 return 1;
4347}
4348
4349_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
4350 assert_return(e, -EINVAL);
b937d761 4351 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 4352 assert_return(tid, -EINVAL);
76b54375 4353 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 4354
76b54375
LP
4355 if (e->tid != 0) {
4356 *tid = e->tid;
4357 return 0;
4358 }
4359
4360 return -ENXIO;
afc6adb5 4361}
cde93897
LP
4362
4363_public_ int sd_event_set_watchdog(sd_event *e, int b) {
4364 int r;
4365
4366 assert_return(e, -EINVAL);
b937d761 4367 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 4368 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
4369
4370 if (e->watchdog == !!b)
4371 return e->watchdog;
4372
4373 if (b) {
09812eb7
LP
4374 r = sd_watchdog_enabled(false, &e->watchdog_period);
4375 if (r <= 0)
cde93897 4376 return r;
cde93897
LP
4377
4378 /* Issue first ping immediately */
4379 sd_notify(false, "WATCHDOG=1");
4380 e->watchdog_last = now(CLOCK_MONOTONIC);
4381
4382 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
4383 if (e->watchdog_fd < 0)
4384 return -errno;
4385
4386 r = arm_watchdog(e);
4387 if (r < 0)
4388 goto fail;
4389
1eac7948 4390 struct epoll_event ev = {
a82f89aa
LP
4391 .events = EPOLLIN,
4392 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
4393 };
cde93897 4394
15c689d7 4395 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
4396 r = -errno;
4397 goto fail;
4398 }
4399
4400 } else {
4401 if (e->watchdog_fd >= 0) {
5a795bff 4402 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 4403 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4404 }
4405 }
4406
4407 e->watchdog = !!b;
4408 return e->watchdog;
4409
4410fail:
03e334a1 4411 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
4412 return r;
4413}
8f726607
LP
4414
4415_public_ int sd_event_get_watchdog(sd_event *e) {
4416 assert_return(e, -EINVAL);
b937d761 4417 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
4418 assert_return(!event_pid_changed(e), -ECHILD);
4419
4420 return e->watchdog;
4421}
60a3b1e1
LP
4422
4423_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
4424 assert_return(e, -EINVAL);
b937d761 4425 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
4426 assert_return(!event_pid_changed(e), -ECHILD);
4427
4428 *ret = e->iteration;
4429 return 0;
4430}
15723a1d
LP
4431
4432_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
4433 assert_return(s, -EINVAL);
4434
4435 s->destroy_callback = callback;
4436 return 0;
4437}
4438
4439_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
4440 assert_return(s, -EINVAL);
4441
4442 if (ret)
4443 *ret = s->destroy_callback;
4444
4445 return !!s->destroy_callback;
4446}
2382c936
YW
4447
4448_public_ int sd_event_source_get_floating(sd_event_source *s) {
4449 assert_return(s, -EINVAL);
4450
4451 return s->floating;
4452}
4453
4454_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
4455 assert_return(s, -EINVAL);
4456
4457 if (s->floating == !!b)
4458 return 0;
4459
4460 if (!s->event) /* Already disconnected */
4461 return -ESTALE;
4462
4463 s->floating = b;
4464
4465 if (b) {
4466 sd_event_source_ref(s);
4467 sd_event_unref(s->event);
4468 } else {
4469 sd_event_ref(s->event);
4470 sd_event_source_unref(s);
4471 }
4472
4473 return 1;
4474}
b778cba4
LP
4475
4476_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4477 assert_return(s, -EINVAL);
4478 assert_return(s->type != SOURCE_EXIT, -EDOM);
4479
4480 return s->exit_on_failure;
4481}
4482
4483_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4484 assert_return(s, -EINVAL);
4485 assert_return(s->type != SOURCE_EXIT, -EDOM);
4486
4487 if (s->exit_on_failure == !!b)
4488 return 0;
4489
4490 s->exit_on_failure = b;
4491 return 1;
4492}
b6d5481b
LP
4493
4494_public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) {
4495 int r;
4496
4497 assert_return(s, -EINVAL);
4498
4499 /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
4500 * so is a programming error. */
4501 assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM);
4502
4503 /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
4504 * non-ratelimited. */
fd69f224 4505 r = event_source_leave_ratelimit(s, /* run_callback */ false);
b6d5481b
LP
4506 if (r < 0)
4507 return r;
4508
4509 s->rate_limit = (RateLimit) { interval, burst };
4510 return 0;
fd69f224
MS
4511}
4512
4513_public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) {
4514 assert_return(s, -EINVAL);
4515
4516 s->ratelimit_expire_callback = callback;
4517 return 0;
b6d5481b
LP
4518}
4519
4520_public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) {
4521 assert_return(s, -EINVAL);
4522
4523 /* Querying whether an event source has ratelimiting configured is not a loggable offsense, hence
4524 * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error */
4525 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4526 return -EDOM;
4527
4528 if (!ratelimit_configured(&s->rate_limit))
4529 return -ENOEXEC;
4530
4531 if (ret_interval)
4532 *ret_interval = s->rate_limit.interval;
4533 if (ret_burst)
4534 *ret_burst = s->rate_limit.burst;
4535
4536 return 0;
4537}
4538
4539_public_ int sd_event_source_is_ratelimited(sd_event_source *s) {
4540 assert_return(s, -EINVAL);
4541
4542 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4543 return false;
4544
4545 if (!ratelimit_configured(&s->rate_limit))
4546 return false;
4547
4548 return s->ratelimited;
4549}